diff --git a/GUI_cal_metrics.py b/GUI_cal_metrics.py
index 88a5133..2df36e3 100644
--- a/GUI_cal_metrics.py
+++ b/GUI_cal_metrics.py
@@ -63,6 +63,11 @@ def run_program():
         args.append("--verbose")
     if topclass_checkbox.value:
         args.append("--topclass")
+    if hl_test_validation_checkbox.value:
+        if not any(metric in selected_metrics for metric in ["HL-H", "HL-C"]):
+            ui.notify("Error: HL test validation requires either HL-H or HL-C metric to be selected.", type="error")
+            return
+        args.append("--hl_test_validation")
 
     command = ["python", "cal_metrics.py"] + args
     print("Running command:", " ".join(command))
@@ -110,7 +115,6 @@ def clear_cache():
         }
         clearCache();
     ''')
-    ui.notify('Browser cache cleared')
 
     plot_image.clear()
     plot_image.set_source(None)  # Set the image source to None
@@ -159,6 +163,7 @@ async def pick_file() -> None:
                                              value=1, step=1)
         num_bins_input = ui.number(label='Number of Bins for ECE/MCE/HL Test',
                                    value=10, min=2, step=1)
+        hl_test_validation_checkbox = ui.checkbox('HL Test Validation set', value=False)
 
     with ui.column().classes('w-1/3 p-4'):
         ui.label('Output Paths:')
diff --git a/cal_metrics.py b/cal_metrics.py
index d3842b3..dc19985 100755
--- a/cal_metrics.py
+++ b/cal_metrics.py
@@ -21,7 +21,10 @@ def perform_calculation(probs, labels, args, suffix=""):
     cal_metrics = CalibrationMetrics(
         class_to_calculate=args.class_to_calculate, num_bins=args.num_bins
     )
-
+    if args.hl_test_validation:
+        df = args.num_bins
+    else:
+        df = args.num_bins - 2
     metrics_to_calculate = args.metrics.split(",") if args.metrics else ["all"]
     if metrics_to_calculate == ["all"]:
         metrics_to_calculate = "all"
@@ -30,6 +33,7 @@ def perform_calculation(probs, labels, args, suffix=""):
         y_proba=probs,
         metrics=metrics_to_calculate,
         perform_pervalance_adjustment=args.prevalence_adjustment,
+        df = df
     )
 
     keys = list(result.keys())
@@ -42,6 +46,7 @@ def perform_calculation(probs, labels, args, suffix=""):
             n_samples=args.n_bootstrap,
             metrics=metrics_to_calculate,
             perform_pervalance_adjustment=args.prevalence_adjustment,
+            df = df
         )
         CI = get_CI(bootstrap_results)
         result = np.vstack((result, np.array(list(CI.values())).T))
@@ -214,6 +219,12 @@ def main():
         default=10,
         help="Number of bins for ECE/MCE/HL calculations (default: 10)",
     )
+    parser.add_argument(
+        "--hl_test_validation",
+        default=False,
+        action="store_true",
+        help="Using nbin instead of nbin-2 as HL test DOF. Use it if the dataset is validation set.",
+    )
     parser.add_argument(
         "--topclass",
         default=False,
diff --git a/calzone/metrics.py b/calzone/metrics.py
index ad5211c..0a9b0fb 100755
--- a/calzone/metrics.py
+++ b/calzone/metrics.py
@@ -16,7 +16,7 @@
 import numpy.lib.recfunctions as rf
 import contextlib
 
-def hosmer_lemeshow_test(reliability, confidence, bin_count, df=None):
+def hosmer_lemeshow_test(reliability, confidence, bin_count, df=None, **kwargs):
     """
     Compute the Hosmer-Lemeshow test for goodness of fit.
 
@@ -546,7 +546,7 @@ def calculate_metrics(
                     results["MCE-H"] = mce_h_class
                 elif metric == "HL-H":
                     hl_h_score, hl_h, _ = hosmer_lemeshow_test(
-                        acc_H_class, confidence_H_class, bin_count_H_class
+                        acc_H_class, confidence_H_class, bin_count_H_class, **kwargs
                     )
                     results["HL-H score"] = hl_h_score
                     results["HL-H p-value"] = hl_h
@@ -591,7 +591,7 @@ def calculate_metrics(
                     results["MCE-C"] = mce_c_class
                 elif metric == "HL-C":
                     hl_c_score, hl_c, _ = hosmer_lemeshow_test(
-                        acc_C_class, confidence_C_class, bin_count_C_class
+                        acc_C_class, confidence_C_class, bin_count_C_class, **kwargs
                     )
                     results["HL-C score"] = hl_c_score
                     results["HL-C p-value"] = hl_c
diff --git a/docs/build/doctrees/calzone.doctree b/docs/build/doctrees/calzone.doctree
index e4dbaf6..4a0488b 100644
Binary files a/docs/build/doctrees/calzone.doctree and b/docs/build/doctrees/calzone.doctree differ
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 2967dde..124a11a 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/nbsphinx/notebooks/hl_test.ipynb b/docs/build/doctrees/nbsphinx/notebooks/hl_test.ipynb
index ec53edd..41e8204 100644
--- a/docs/build/doctrees/nbsphinx/notebooks/hl_test.ipynb
+++ b/docs/build/doctrees/nbsphinx/notebooks/hl_test.ipynb
@@ -20,7 +20,7 @@
     "\\text{HL} = \\sum_{m=1}^{M} \\left[\\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}} + \\frac{(O_{0,m}-E_{0,m})^2}{E_{0,m}}\\right]  = \\sum_{m=1}^{M} \\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}(1-\\frac{E_{1,m}}{N_m})} \\sim \\chi^2_{M-2}\n",
     "$$\n",
     "\n",
-    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. (Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We could not find a proof to this statement and we provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$.)"
+    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$."
    ]
   },
   {
@@ -280,7 +280,54 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that both the equal-width and the equal-count method have the incorrect size."
+    "We can see that both the equal-width and the equal-count method have the incorrect size. The simulation support the claim that the degree of freedom should be M instead of M-2. We can show it with simulation. We are not proving the claim here since it is beyond the scope of this documentation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The size of HL-H with df=M is : 0.047\n",
+      "The size of HL-C with df=M  is : 0.055\n"
+     ]
+    }
+   ],
+   "source": [
+    "### The size of HL Test\n",
+    "from calzone.utils import fake_binary_data_generator\n",
+    "from importlib import reload\n",
+    "import calzone.metrics\n",
+    "reload(calzone.metrics)\n",
+    "from calzone.metrics import CalibrationMetrics\n",
+    "np.random.seed(123)\n",
+    "fakedata_generator = fake_binary_data_generator(alpha_val=0.5, beta_val=0.5)\n",
+    "cal_metrics = CalibrationMetrics()\n",
+    "sample_size = 1000\n",
+    "simulation_size = 10000\n",
+    "results = []\n",
+    "# generate data\n",
+    "for i in range(simulation_size):\n",
+    "    X, y = fakedata_generator.generate_data(sample_size)\n",
+    "    if i == 0:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=False, df = 10)\n",
+    "        keys = list(tempresult.keys())\n",
+    "        results.append(np.array(list(tempresult.values())))\n",
+    "    else:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=True, df = 10)\n",
+    "        results.append(tempresult)\n",
+    "results = np.array(results)\n",
+    "\n",
+    "hl_h_pvalue = results[:,1]\n",
+    "hl_c_pvalue = results[:,3]\n",
+    "size_h = np.mean(hl_h_pvalue < 0.05)\n",
+    "size_c = np.mean(hl_c_pvalue < 0.05)\n",
+    "print(\"The size of HL-H with df=M is :\", round(size_h,3))\n",
+    "print(\"The size of HL-C with df=M  is :\", round(size_c,3))"
    ]
   },
   {
diff --git a/docs/build/doctrees/nbsphinx/notebooks/quickstart.ipynb b/docs/build/doctrees/nbsphinx/notebooks/quickstart.ipynb
index 1e5b953..63d7bc1 100644
--- a/docs/build/doctrees/nbsphinx/notebooks/quickstart.ipynb
+++ b/docs/build/doctrees/nbsphinx/notebooks/quickstart.ipynb
@@ -184,7 +184,8 @@
       "                      [--prevalence_adjustment] [--n_bootstrap N_BOOTSTRAP]\n",
       "                      [--bootstrap_ci BOOTSTRAP_CI]\n",
       "                      [--class_to_calculate CLASS_TO_CALCULATE]\n",
-      "                      [--num_bins NUM_BINS] [--topclass]\n",
+      "                      [--num_bins NUM_BINS]\n",
+      "                      [--hl_test_validation HL_TEST_VALIDATION] [--topclass]\n",
       "                      [--save_metrics SAVE_METRICS] [--plot]\n",
       "                      [--plot_bins PLOT_BINS] [--save_plot SAVE_PLOT]\n",
       "                      [--save_diagram_output SAVE_DIAGRAM_OUTPUT] [--verbose]\n",
@@ -211,6 +212,9 @@
       "                        Class to calculate metrics for (default: 1)\n",
       "  --num_bins NUM_BINS   Number of bins for ECE/MCE/HL calculations (default:\n",
       "                        10)\n",
+      "  --hl_test_validation HL_TEST_VALIDATION\n",
+      "                        Using nbin instead of nbin-2 as HL test DOF. Use it if\n",
+      "                        the dataset is validation set.\n",
       "  --topclass            Whether to transform the problem to top-class problem.\n",
       "  --save_metrics SAVE_METRICS\n",
       "                        Save the metrics to a csv file\n",
@@ -218,7 +222,8 @@
       "  --plot_bins PLOT_BINS\n",
       "                        Number of bins for reliability diagram\n",
       "  --save_plot SAVE_PLOT\n",
-      "                        Save the plot to a file\n",
+      "                        Save the plot to a file. Must end with valid image\n",
+      "                        formats.\n",
       "  --save_diagram_output SAVE_DIAGRAM_OUTPUT\n",
       "                        Save the reliability diagram output to a file\n",
       "  --verbose             Print verbose output\n"
@@ -290,7 +295,8 @@
     "--verbose \\\n",
     "--save_diagram_output '../../../example_data/simulated_welldata_diagram_output.csv' \n",
     "### save_diagram_output only when you want to save the reliability diagram output\n",
-    "#--prevalence_adjustment # only when you want to apply prevalence adjustment"
+    "#--prevalence_adjustment # only when you want to apply prevalence adjustment\n",
+    "#--hl_test_validation #use it only when the data is from validation set"
    ]
   },
   {
diff --git a/docs/build/doctrees/notebooks/hl_test.doctree b/docs/build/doctrees/notebooks/hl_test.doctree
index b34a697..d071253 100644
Binary files a/docs/build/doctrees/notebooks/hl_test.doctree and b/docs/build/doctrees/notebooks/hl_test.doctree differ
diff --git a/docs/build/doctrees/notebooks/quickstart.doctree b/docs/build/doctrees/notebooks/quickstart.doctree
index 2c9f318..7b5c69f 100644
Binary files a/docs/build/doctrees/notebooks/quickstart.doctree and b/docs/build/doctrees/notebooks/quickstart.doctree differ
diff --git a/docs/build/html/_modules/calzone/metrics.html b/docs/build/html/_modules/calzone/metrics.html
index 4bc7441..bee4c2d 100644
--- a/docs/build/html/_modules/calzone/metrics.html
+++ b/docs/build/html/_modules/calzone/metrics.html
@@ -105,7 +105,7 @@ <h1>Source code for calzone.metrics</h1><div class="highlight"><pre>
 
 <div class="viewcode-block" id="hosmer_lemeshow_test">
 <a class="viewcode-back" href="../../calzone.html#calzone.metrics.hosmer_lemeshow_test">[docs]</a>
-<span class="k">def</span> <span class="nf">hosmer_lemeshow_test</span><span class="p">(</span><span class="n">reliability</span><span class="p">,</span> <span class="n">confidence</span><span class="p">,</span> <span class="n">bin_count</span><span class="p">,</span> <span class="n">df</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+<span class="k">def</span> <span class="nf">hosmer_lemeshow_test</span><span class="p">(</span><span class="n">reliability</span><span class="p">,</span> <span class="n">confidence</span><span class="p">,</span> <span class="n">bin_count</span><span class="p">,</span> <span class="n">df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Compute the Hosmer-Lemeshow test for goodness of fit.</span>
 
@@ -667,7 +667,7 @@ <h1>Source code for calzone.metrics</h1><div class="highlight"><pre>
                     <span class="n">results</span><span class="p">[</span><span class="s2">&quot;MCE-H&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">mce_h_class</span>
                 <span class="k">elif</span> <span class="n">metric</span> <span class="o">==</span> <span class="s2">&quot;HL-H&quot;</span><span class="p">:</span>
                     <span class="n">hl_h_score</span><span class="p">,</span> <span class="n">hl_h</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">hosmer_lemeshow_test</span><span class="p">(</span>
-                        <span class="n">acc_H_class</span><span class="p">,</span> <span class="n">confidence_H_class</span><span class="p">,</span> <span class="n">bin_count_H_class</span>
+                        <span class="n">acc_H_class</span><span class="p">,</span> <span class="n">confidence_H_class</span><span class="p">,</span> <span class="n">bin_count_H_class</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
                     <span class="p">)</span>
                     <span class="n">results</span><span class="p">[</span><span class="s2">&quot;HL-H score&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hl_h_score</span>
                     <span class="n">results</span><span class="p">[</span><span class="s2">&quot;HL-H p-value&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hl_h</span>
@@ -712,7 +712,7 @@ <h1>Source code for calzone.metrics</h1><div class="highlight"><pre>
                     <span class="n">results</span><span class="p">[</span><span class="s2">&quot;MCE-C&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">mce_c_class</span>
                 <span class="k">elif</span> <span class="n">metric</span> <span class="o">==</span> <span class="s2">&quot;HL-C&quot;</span><span class="p">:</span>
                     <span class="n">hl_c_score</span><span class="p">,</span> <span class="n">hl_c</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">hosmer_lemeshow_test</span><span class="p">(</span>
-                        <span class="n">acc_C_class</span><span class="p">,</span> <span class="n">confidence_C_class</span><span class="p">,</span> <span class="n">bin_count_C_class</span>
+                        <span class="n">acc_C_class</span><span class="p">,</span> <span class="n">confidence_C_class</span><span class="p">,</span> <span class="n">bin_count_C_class</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span>
                     <span class="p">)</span>
                     <span class="n">results</span><span class="p">[</span><span class="s2">&quot;HL-C score&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hl_c_score</span>
                     <span class="n">results</span><span class="p">[</span><span class="s2">&quot;HL-C p-value&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hl_c</span>
diff --git a/docs/build/html/_sources/notebooks/hl_test.ipynb.txt b/docs/build/html/_sources/notebooks/hl_test.ipynb.txt
index ec53edd..41e8204 100644
--- a/docs/build/html/_sources/notebooks/hl_test.ipynb.txt
+++ b/docs/build/html/_sources/notebooks/hl_test.ipynb.txt
@@ -20,7 +20,7 @@
     "\\text{HL} = \\sum_{m=1}^{M} \\left[\\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}} + \\frac{(O_{0,m}-E_{0,m})^2}{E_{0,m}}\\right]  = \\sum_{m=1}^{M} \\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}(1-\\frac{E_{1,m}}{N_m})} \\sim \\chi^2_{M-2}\n",
     "$$\n",
     "\n",
-    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. (Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We could not find a proof to this statement and we provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$.)"
+    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$."
    ]
   },
   {
@@ -280,7 +280,54 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that both the equal-width and the equal-count method have the incorrect size."
+    "We can see that both the equal-width and the equal-count method have the incorrect size. The simulation support the claim that the degree of freedom should be M instead of M-2. We can show it with simulation. We are not proving the claim here since it is beyond the scope of this documentation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The size of HL-H with df=M is : 0.047\n",
+      "The size of HL-C with df=M  is : 0.055\n"
+     ]
+    }
+   ],
+   "source": [
+    "### The size of HL Test\n",
+    "from calzone.utils import fake_binary_data_generator\n",
+    "from importlib import reload\n",
+    "import calzone.metrics\n",
+    "reload(calzone.metrics)\n",
+    "from calzone.metrics import CalibrationMetrics\n",
+    "np.random.seed(123)\n",
+    "fakedata_generator = fake_binary_data_generator(alpha_val=0.5, beta_val=0.5)\n",
+    "cal_metrics = CalibrationMetrics()\n",
+    "sample_size = 1000\n",
+    "simulation_size = 10000\n",
+    "results = []\n",
+    "# generate data\n",
+    "for i in range(simulation_size):\n",
+    "    X, y = fakedata_generator.generate_data(sample_size)\n",
+    "    if i == 0:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=False, df = 10)\n",
+    "        keys = list(tempresult.keys())\n",
+    "        results.append(np.array(list(tempresult.values())))\n",
+    "    else:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=True, df = 10)\n",
+    "        results.append(tempresult)\n",
+    "results = np.array(results)\n",
+    "\n",
+    "hl_h_pvalue = results[:,1]\n",
+    "hl_c_pvalue = results[:,3]\n",
+    "size_h = np.mean(hl_h_pvalue < 0.05)\n",
+    "size_c = np.mean(hl_c_pvalue < 0.05)\n",
+    "print(\"The size of HL-H with df=M is :\", round(size_h,3))\n",
+    "print(\"The size of HL-C with df=M  is :\", round(size_c,3))"
    ]
   },
   {
diff --git a/docs/build/html/_sources/notebooks/quickstart.ipynb.txt b/docs/build/html/_sources/notebooks/quickstart.ipynb.txt
index 1e5b953..63d7bc1 100644
--- a/docs/build/html/_sources/notebooks/quickstart.ipynb.txt
+++ b/docs/build/html/_sources/notebooks/quickstart.ipynb.txt
@@ -184,7 +184,8 @@
       "                      [--prevalence_adjustment] [--n_bootstrap N_BOOTSTRAP]\n",
       "                      [--bootstrap_ci BOOTSTRAP_CI]\n",
       "                      [--class_to_calculate CLASS_TO_CALCULATE]\n",
-      "                      [--num_bins NUM_BINS] [--topclass]\n",
+      "                      [--num_bins NUM_BINS]\n",
+      "                      [--hl_test_validation HL_TEST_VALIDATION] [--topclass]\n",
       "                      [--save_metrics SAVE_METRICS] [--plot]\n",
       "                      [--plot_bins PLOT_BINS] [--save_plot SAVE_PLOT]\n",
       "                      [--save_diagram_output SAVE_DIAGRAM_OUTPUT] [--verbose]\n",
@@ -211,6 +212,9 @@
       "                        Class to calculate metrics for (default: 1)\n",
       "  --num_bins NUM_BINS   Number of bins for ECE/MCE/HL calculations (default:\n",
       "                        10)\n",
+      "  --hl_test_validation HL_TEST_VALIDATION\n",
+      "                        Using nbin instead of nbin-2 as HL test DOF. Use it if\n",
+      "                        the dataset is validation set.\n",
       "  --topclass            Whether to transform the problem to top-class problem.\n",
       "  --save_metrics SAVE_METRICS\n",
       "                        Save the metrics to a csv file\n",
@@ -218,7 +222,8 @@
       "  --plot_bins PLOT_BINS\n",
       "                        Number of bins for reliability diagram\n",
       "  --save_plot SAVE_PLOT\n",
-      "                        Save the plot to a file\n",
+      "                        Save the plot to a file. Must end with valid image\n",
+      "                        formats.\n",
       "  --save_diagram_output SAVE_DIAGRAM_OUTPUT\n",
       "                        Save the reliability diagram output to a file\n",
       "  --verbose             Print verbose output\n"
@@ -290,7 +295,8 @@
     "--verbose \\\n",
     "--save_diagram_output '../../../example_data/simulated_welldata_diagram_output.csv' \n",
     "### save_diagram_output only when you want to save the reliability diagram output\n",
-    "#--prevalence_adjustment # only when you want to apply prevalence adjustment"
+    "#--prevalence_adjustment # only when you want to apply prevalence adjustment\n",
+    "#--hl_test_validation #use it only when the data is from validation set"
    ]
   },
   {
diff --git a/docs/build/html/calzone.html b/docs/build/html/calzone.html
index 39c487b..707b549 100644
--- a/docs/build/html/calzone.html
+++ b/docs/build/html/calzone.html
@@ -386,7 +386,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Link to this headi
 
 <dl class="py function">
 <dt class="sig sig-object py" id="calzone.metrics.hosmer_lemeshow_test">
-<span class="sig-prename descclassname"><span class="pre">calzone.metrics.</span></span><span class="sig-name descname"><span class="pre">hosmer_lemeshow_test</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reliability</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">confidence</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bin_count</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/calzone/metrics.html#hosmer_lemeshow_test"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#calzone.metrics.hosmer_lemeshow_test" title="Link to this definition"></a></dt>
+<span class="sig-prename descclassname"><span class="pre">calzone.metrics.</span></span><span class="sig-name descname"><span class="pre">hosmer_lemeshow_test</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reliability</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">confidence</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bin_count</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/calzone/metrics.html#hosmer_lemeshow_test"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#calzone.metrics.hosmer_lemeshow_test" title="Link to this definition"></a></dt>
 <dd><p>Compute the Hosmer-Lemeshow test for goodness of fit.</p>
 <p>This test is used to assess the calibration of binary classification models with full probability outputs.
 It compares observed and expected frequencies of events in groups of the data.</p>
diff --git a/docs/build/html/notebooks/hl_test.html b/docs/build/html/notebooks/hl_test.html
index 6f302f1..54ade3e 100644
--- a/docs/build/html/notebooks/hl_test.html
+++ b/docs/build/html/notebooks/hl_test.html
@@ -108,7 +108,7 @@ <h2>Theoretical Background<a class="headerlink" href="#Theoretical-Background" t
 <div class="math notranslate nohighlight">
 \[\text{HL} = \sum_{m=1}^{M} \left[\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}} + \frac{(O_{0,m}-E_{0,m})^2}{E_{0,m}}\right]  = \sum_{m=1}^{M} \frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}(1-\frac{E_{1,m}}{N_m})} \sim \chi^2_{M-2}\]</div>
 <p>where <span class="math notranslate nohighlight">\(E_{1,m}\)</span> is the expected number of class 1 events in the <span class="math notranslate nohighlight">\(\text{m}^{th}\)</span> bin, <span class="math notranslate nohighlight">\(O_{1,m}\)</span> is the observed number of class 1 events in the <span class="math notranslate nohighlight">\(\text{m}^{th}\)</span> bin, <span class="math notranslate nohighlight">\(N_m\)</span> is the total number of observations in the <span class="math notranslate nohighlight">\(\text{m}^{th}\)</span> bin, and <span class="math notranslate nohighlight">\(M\)</span> is the number of bins. The HL test statistic is distributed as a chi-squared distribution with <span class="math notranslate nohighlight">\(M-2\)</span> degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine
-whether we can reject the null hypothesis that the model is well-calibrated. (Notice that the degree of freedom of HL test is <span class="math notranslate nohighlight">\(M-2\)</span> by default but some literature suggests that the degree of freedom should be <span class="math notranslate nohighlight">\(M\)</span> instead when the samples is not used for training. We could not find a proof to this statement and we provides the option to specify the degree of freedom in the <code class="docutils literal notranslate"><span class="pre">calzone</span></code>. The default value is still <span class="math notranslate nohighlight">\(M-2\)</span>.)</p>
+whether we can reject the null hypothesis that the model is well-calibrated. Notice that the degree of freedom of HL test is <span class="math notranslate nohighlight">\(M-2\)</span> by default but some literature suggests that the degree of freedom should be <span class="math notranslate nohighlight">\(M\)</span> instead when the samples is not used for training. We provides the option to specify the degree of freedom in the <code class="docutils literal notranslate"><span class="pre">calzone</span></code>. The default value is still <span class="math notranslate nohighlight">\(M-2\)</span>.</p>
 </section>
 <section id="Pros-of-HL-test">
 <h2>Pros of HL test<a class="headerlink" href="#Pros-of-HL-test" title="Link to this heading"></a></h2>
@@ -310,7 +310,53 @@ <h2>Size of HL test<a class="headerlink" href="#Size-of-HL-test" title="Link to
 <img alt="../_images/notebooks_hl_test_14_1.png" src="../_images/notebooks_hl_test_14_1.png" />
 </div>
 </div>
-<p>We can see that both the equal-width and the equal-count method have the incorrect size.</p>
+<p>We can see that both the equal-width and the equal-count method have the incorrect size. The simulation support the claim that the degree of freedom should be M instead of M-2. We can show it with simulation. We are not proving the claim here since it is beyond the scope of this documentation.</p>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1">### The size of HL Test</span>
+<span class="kn">from</span> <span class="nn">calzone.utils</span> <span class="kn">import</span> <span class="n">fake_binary_data_generator</span>
+<span class="kn">from</span> <span class="nn">importlib</span> <span class="kn">import</span> <span class="n">reload</span>
+<span class="kn">import</span> <span class="nn">calzone.metrics</span>
+<span class="n">reload</span><span class="p">(</span><span class="n">calzone</span><span class="o">.</span><span class="n">metrics</span><span class="p">)</span>
+<span class="kn">from</span> <span class="nn">calzone.metrics</span> <span class="kn">import</span> <span class="n">CalibrationMetrics</span>
+<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">123</span><span class="p">)</span>
+<span class="n">fakedata_generator</span> <span class="o">=</span> <span class="n">fake_binary_data_generator</span><span class="p">(</span><span class="n">alpha_val</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">beta_val</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
+<span class="n">cal_metrics</span> <span class="o">=</span> <span class="n">CalibrationMetrics</span><span class="p">()</span>
+<span class="n">sample_size</span> <span class="o">=</span> <span class="mi">1000</span>
+<span class="n">simulation_size</span> <span class="o">=</span> <span class="mi">10000</span>
+<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
+<span class="c1"># generate data</span>
+<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">simulation_size</span><span class="p">):</span>
+    <span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">fakedata_generator</span><span class="o">.</span><span class="n">generate_data</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
+    <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+        <span class="n">tempresult</span> <span class="o">=</span> <span class="n">cal_metrics</span><span class="o">.</span><span class="n">calculate_metrics</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;HL-H&#39;</span><span class="p">,</span> <span class="s1">&#39;HL-C&#39;</span><span class="p">],</span><span class="n">return_numpy</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">df</span> <span class="o">=</span> <span class="mi">10</span><span class="p">)</span>
+        <span class="n">keys</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">tempresult</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+        <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">tempresult</span><span class="o">.</span><span class="n">values</span><span class="p">())))</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="n">tempresult</span> <span class="o">=</span> <span class="n">cal_metrics</span><span class="o">.</span><span class="n">calculate_metrics</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;HL-H&#39;</span><span class="p">,</span> <span class="s1">&#39;HL-C&#39;</span><span class="p">],</span><span class="n">return_numpy</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">df</span> <span class="o">=</span> <span class="mi">10</span><span class="p">)</span>
+        <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tempresult</span><span class="p">)</span>
+<span class="n">results</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">results</span><span class="p">)</span>
+
+<span class="n">hl_h_pvalue</span> <span class="o">=</span> <span class="n">results</span><span class="p">[:,</span><span class="mi">1</span><span class="p">]</span>
+<span class="n">hl_c_pvalue</span> <span class="o">=</span> <span class="n">results</span><span class="p">[:,</span><span class="mi">3</span><span class="p">]</span>
+<span class="n">size_h</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">hl_h_pvalue</span> <span class="o">&lt;</span> <span class="mf">0.05</span><span class="p">)</span>
+<span class="n">size_c</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">hl_c_pvalue</span> <span class="o">&lt;</span> <span class="mf">0.05</span><span class="p">)</span>
+<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;The size of HL-H with df=M is :&quot;</span><span class="p">,</span> <span class="nb">round</span><span class="p">(</span><span class="n">size_h</span><span class="p">,</span><span class="mi">3</span><span class="p">))</span>
+<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;The size of HL-C with df=M  is :&quot;</span><span class="p">,</span> <span class="nb">round</span><span class="p">(</span><span class="n">size_c</span><span class="p">,</span><span class="mi">3</span><span class="p">))</span>
+</pre></div>
+</div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+The size of HL-H with df=M is : 0.047
+The size of HL-C with df=M  is : 0.055
+</pre></div></div>
+</div>
 </section>
 <section id="Reference">
 <h2>Reference<a class="headerlink" href="#Reference" title="Link to this heading"></a></h2>
diff --git a/docs/build/html/notebooks/hl_test.ipynb b/docs/build/html/notebooks/hl_test.ipynb
index ec53edd..41e8204 100644
--- a/docs/build/html/notebooks/hl_test.ipynb
+++ b/docs/build/html/notebooks/hl_test.ipynb
@@ -20,7 +20,7 @@
     "\\text{HL} = \\sum_{m=1}^{M} \\left[\\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}} + \\frac{(O_{0,m}-E_{0,m})^2}{E_{0,m}}\\right]  = \\sum_{m=1}^{M} \\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}(1-\\frac{E_{1,m}}{N_m})} \\sim \\chi^2_{M-2}\n",
     "$$\n",
     "\n",
-    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. (Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We could not find a proof to this statement and we provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$.)"
+    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$."
    ]
   },
   {
@@ -280,7 +280,54 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that both the equal-width and the equal-count method have the incorrect size."
+    "We can see that both the equal-width and the equal-count method have the incorrect size. The simulation support the claim that the degree of freedom should be M instead of M-2. We can show it with simulation. We are not proving the claim here since it is beyond the scope of this documentation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The size of HL-H with df=M is : 0.047\n",
+      "The size of HL-C with df=M  is : 0.055\n"
+     ]
+    }
+   ],
+   "source": [
+    "### The size of HL Test\n",
+    "from calzone.utils import fake_binary_data_generator\n",
+    "from importlib import reload\n",
+    "import calzone.metrics\n",
+    "reload(calzone.metrics)\n",
+    "from calzone.metrics import CalibrationMetrics\n",
+    "np.random.seed(123)\n",
+    "fakedata_generator = fake_binary_data_generator(alpha_val=0.5, beta_val=0.5)\n",
+    "cal_metrics = CalibrationMetrics()\n",
+    "sample_size = 1000\n",
+    "simulation_size = 10000\n",
+    "results = []\n",
+    "# generate data\n",
+    "for i in range(simulation_size):\n",
+    "    X, y = fakedata_generator.generate_data(sample_size)\n",
+    "    if i == 0:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=False, df = 10)\n",
+    "        keys = list(tempresult.keys())\n",
+    "        results.append(np.array(list(tempresult.values())))\n",
+    "    else:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=True, df = 10)\n",
+    "        results.append(tempresult)\n",
+    "results = np.array(results)\n",
+    "\n",
+    "hl_h_pvalue = results[:,1]\n",
+    "hl_c_pvalue = results[:,3]\n",
+    "size_h = np.mean(hl_h_pvalue < 0.05)\n",
+    "size_c = np.mean(hl_c_pvalue < 0.05)\n",
+    "print(\"The size of HL-H with df=M is :\", round(size_h,3))\n",
+    "print(\"The size of HL-C with df=M  is :\", round(size_c,3))"
    ]
   },
   {
diff --git a/docs/build/html/notebooks/quickstart.html b/docs/build/html/notebooks/quickstart.html
index f3adffa..d4c0377 100644
--- a/docs/build/html/notebooks/quickstart.html
+++ b/docs/build/html/notebooks/quickstart.html
@@ -191,7 +191,8 @@ <h2>Command line interface<a class="headerlink" href="#Command-line-interface" t
                       [--prevalence_adjustment] [--n_bootstrap N_BOOTSTRAP]
                       [--bootstrap_ci BOOTSTRAP_CI]
                       [--class_to_calculate CLASS_TO_CALCULATE]
-                      [--num_bins NUM_BINS] [--topclass]
+                      [--num_bins NUM_BINS]
+                      [--hl_test_validation HL_TEST_VALIDATION] [--topclass]
                       [--save_metrics SAVE_METRICS] [--plot]
                       [--plot_bins PLOT_BINS] [--save_plot SAVE_PLOT]
                       [--save_diagram_output SAVE_DIAGRAM_OUTPUT] [--verbose]
@@ -218,6 +219,9 @@ <h2>Command line interface<a class="headerlink" href="#Command-line-interface" t
                         Class to calculate metrics for (default: 1)
   --num_bins NUM_BINS   Number of bins for ECE/MCE/HL calculations (default:
                         10)
+  --hl_test_validation HL_TEST_VALIDATION
+                        Using nbin instead of nbin-2 as HL test DOF. Use it if
+                        the dataset is validation set.
   --topclass            Whether to transform the problem to top-class problem.
   --save_metrics SAVE_METRICS
                         Save the metrics to a csv file
@@ -225,7 +229,8 @@ <h2>Command line interface<a class="headerlink" href="#Command-line-interface" t
   --plot_bins PLOT_BINS
                         Number of bins for reliability diagram
   --save_plot SAVE_PLOT
-                        Save the plot to a file
+                        Save the plot to a file. Must end with valid image
+                        formats.
   --save_diagram_output SAVE_DIAGRAM_OUTPUT
                         Save the reliability diagram output to a file
   --verbose             Print verbose output
@@ -250,6 +255,7 @@ <h2>Command line interface<a class="headerlink" href="#Command-line-interface" t
 <span class="o">--</span><span class="n">save_diagram_output</span> <span class="s1">&#39;../../../example_data/simulated_welldata_diagram_output.csv&#39;</span>
 <span class="c1">### save_diagram_output only when you want to save the reliability diagram output</span>
 <span class="c1">#--prevalence_adjustment # only when you want to apply prevalence adjustment</span>
+<span class="c1">#--hl_test_validation #use it only when the data is from validation set</span>
 </pre></div>
 </div>
 </div>
diff --git a/docs/build/html/notebooks/quickstart.ipynb b/docs/build/html/notebooks/quickstart.ipynb
index 1e5b953..63d7bc1 100644
--- a/docs/build/html/notebooks/quickstart.ipynb
+++ b/docs/build/html/notebooks/quickstart.ipynb
@@ -184,7 +184,8 @@
       "                      [--prevalence_adjustment] [--n_bootstrap N_BOOTSTRAP]\n",
       "                      [--bootstrap_ci BOOTSTRAP_CI]\n",
       "                      [--class_to_calculate CLASS_TO_CALCULATE]\n",
-      "                      [--num_bins NUM_BINS] [--topclass]\n",
+      "                      [--num_bins NUM_BINS]\n",
+      "                      [--hl_test_validation HL_TEST_VALIDATION] [--topclass]\n",
       "                      [--save_metrics SAVE_METRICS] [--plot]\n",
       "                      [--plot_bins PLOT_BINS] [--save_plot SAVE_PLOT]\n",
       "                      [--save_diagram_output SAVE_DIAGRAM_OUTPUT] [--verbose]\n",
@@ -211,6 +212,9 @@
       "                        Class to calculate metrics for (default: 1)\n",
       "  --num_bins NUM_BINS   Number of bins for ECE/MCE/HL calculations (default:\n",
       "                        10)\n",
+      "  --hl_test_validation HL_TEST_VALIDATION\n",
+      "                        Using nbin instead of nbin-2 as HL test DOF. Use it if\n",
+      "                        the dataset is validation set.\n",
       "  --topclass            Whether to transform the problem to top-class problem.\n",
       "  --save_metrics SAVE_METRICS\n",
       "                        Save the metrics to a csv file\n",
@@ -218,7 +222,8 @@
       "  --plot_bins PLOT_BINS\n",
       "                        Number of bins for reliability diagram\n",
       "  --save_plot SAVE_PLOT\n",
-      "                        Save the plot to a file\n",
+      "                        Save the plot to a file. Must end with valid image\n",
+      "                        formats.\n",
       "  --save_diagram_output SAVE_DIAGRAM_OUTPUT\n",
       "                        Save the reliability diagram output to a file\n",
       "  --verbose             Print verbose output\n"
@@ -290,7 +295,8 @@
     "--verbose \\\n",
     "--save_diagram_output '../../../example_data/simulated_welldata_diagram_output.csv' \n",
     "### save_diagram_output only when you want to save the reliability diagram output\n",
-    "#--prevalence_adjustment # only when you want to apply prevalence adjustment"
+    "#--prevalence_adjustment # only when you want to apply prevalence adjustment\n",
+    "#--hl_test_validation #use it only when the data is from validation set"
    ]
   },
   {
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index e13f7aa..11640b2 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"COX calibration analysis": [[4, null]], "Calculating Cox slope and intercept with calzone": [[4, "Calculating-Cox-slope-and-intercept-with-calzone"]], "Calculating ECE and MCE with calzone": [[5, "Calculating-ECE-and-MCE-with-calzone"]], "Calculating HL test statistics and p-value with calzone": [[6, "Calculating-HL-test-statistics-and-p-value-with-calzone"]], "Calculating LOESS ICI and COX ICI using calzone": [[7, "Calculating-LOESS-ICI-and-COX-ICI-using-calzone"]], "Calculating the Spieegelhalter Z score and p-value using calzone": [[12, "Calculating-the-Spieegelhalter-Z-score-and-p-value-using-calzone"]], "Command line interface": [[10, "Command-line-interface"]], "Cons of Cox calibration analysis": [[4, "Cons-of-Cox-calibration-analysis"]], "Cons of ECE and MCE": [[5, "Cons-of-ECE-and-MCE"]], "Cons of HL Test": [[6, "Cons-of-HL-Test"]], "Cons of ICI": [[7, "Cons-of-ICI"]], "Cons of Spiegelhalter\u2019s Z test": [[12, "Cons-of-Spiegelhalter's-Z-test"]], "Contents:": [[1, null]], "ECE and MCE as function of bin size": [[5, "ECE-and-MCE-as-function-of-bin-size"]], "Estimated ECE and MCE": [[5, "Estimated-ECE-and-MCE"]], "Exepected Calibration Error(ECE) and Maximum Calibration Error (MCE)": [[5, null]], "Guide to calzone and calibration metrics": [[8, "Guide-to-calzone-and-calibration-metrics"]], "Hosmer-Lemeshow test (HL test)": [[6, null]], "Installation": [[10, "Installation"]], "Integrated Calibration Index (ICI)": [[7, null]], "Module contents": [[0, "module-calzone"]], "Multiclass extension": [[14, null]], "Preform prevalence adjustment in calzone": [[9, "Preform-prevalence-adjustment-in-calzone"]], "Prevalence adjustment": [[9, null]], "Prevalence adjustment and constant shift in logit of class-of-interest": [[9, "Prevalence-adjustment-and-constant-shift-in-logit-of-class-of-interest"]], "Pros of Cox calibration analysis": [[4, "Pros-of-Cox-calibration-analysis"]], "Pros of ECE and MCE": [[5, "Pros-of-ECE-and-MCE"]], "Pros of HL test": [[6, "Pros-of-HL-test"]], "Pros of ICI": [[7, "Pros-of-ICI"]], "Pros of Spiegelhalter\u2019s Z test": [[12, "Pros-of-Spiegelhalter's-Z-test"]], "Quick Start": [[10, null]], "Reference": [[5, "Reference"], [6, "Reference"], [7, "Reference"], [12, "Reference"]], "References": [[4, "References"], [9, "References"], [11, "References"]], "Reliability diagram": [[11, null]], "Running GUI": [[3, null]], "Size of COX slope and intecept test": [[4, "Size-of-COX-slope-and-intecept-test"]], "Size of HL test": [[6, "Size-of-HL-test"]], "Spiegelhalter\u2019s Z-test": [[12, null]], "Subgroup analysis": [[13, null]], "Submodules": [[0, "submodules"]], "Summary and guide for calzone": [[8, null]], "Testing the size of Spiegelhalter\u2019s z test": [[12, "Testing-the-size-of-Spiegelhalter's-z-test"]], "Theoretical Background": [[4, "Theoretical-Background"], [5, "Theoretical-Background"], [6, "Theoretical-Background"], [7, "Theoretical-Background"]], "Theoretical background": [[12, "Theoretical-background"]], "Using calzone in python": [[10, "Using-calzone-in-python"]], "Visualization of the fitted curve": [[7, "Visualization-of-the-fitted-curve"]], "Welcome to the documentation for calzone": [[1, null]], "calzone": [[2, null]], "calzone package": [[0, null]], "calzone.metrics module": [[0, "module-calzone.metrics"]], "calzone.utils module": [[0, "module-calzone.utils"]], "calzone.vis module": [[0, "module-calzone.vis"]]}, "docnames": ["calzone", "index", "modules", "notebooks/GUI", "notebooks/cox", "notebooks/ece_mce", "notebooks/hl_test", "notebooks/ici", "notebooks/metrics_summary", "notebooks/prevalence_adjustment", "notebooks/quickstart", "notebooks/reliability_diagram", "notebooks/spiegelhalter_z", "notebooks/subgroup", "notebooks/topclass"], "envversion": {"nbsphinx": 4, "sphinx": 63, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["calzone.rst", "index.rst", "modules.rst", "notebooks/GUI.ipynb", "notebooks/cox.ipynb", "notebooks/ece_mce.ipynb", "notebooks/hl_test.ipynb", "notebooks/ici.ipynb", "notebooks/metrics_summary.ipynb", "notebooks/prevalence_adjustment.ipynb", "notebooks/quickstart.ipynb", "notebooks/reliability_diagram.ipynb", "notebooks/spiegelhalter_z.ipynb", "notebooks/subgroup.ipynb", "notebooks/topclass.ipynb"], "indexentries": {"__init__() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.__init__", false]], "__init__() (calzone.utils.data_loader method)": [[0, "calzone.utils.data_loader.__init__", false], [0, "id0", false]], "__init__() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.__init__", false]], "abraitary_miscal() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.abraitary_miscal", false]], "alpha_val (calzone.utils.fake_binary_data_generator attribute)": [[0, "calzone.utils.fake_binary_data_generator.alpha_val", false]], "apply_prevalence_adjustment() (in module calzone.utils)": [[0, "calzone.utils.apply_prevalence_adjustment", false]], "beta_val (calzone.utils.fake_binary_data_generator attribute)": [[0, "calzone.utils.fake_binary_data_generator.beta_val", false]], "bootstrap() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.bootstrap", false]], "cal_ici() (in module calzone.metrics)": [[0, "calzone.metrics.cal_ICI", false]], "cal_ici_cox() (in module calzone.metrics)": [[0, "calzone.metrics.cal_ICI_cox", false]], "calculate_ece_mce() (in module calzone.metrics)": [[0, "calzone.metrics.calculate_ece_mce", false]], "calculate_metrics() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.calculate_metrics", false]], "calibrationmetrics (class in calzone.metrics)": [[0, "calzone.metrics.CalibrationMetrics", false]], "calzone": [[0, "module-calzone", false]], "calzone.metrics": [[0, "module-calzone.metrics", false]], "calzone.utils": [[0, "module-calzone.utils", false]], "calzone.vis": [[0, "module-calzone.vis", false]], "cox_regression_analysis() (in module calzone.metrics)": [[0, "calzone.metrics.cox_regression_analysis", false]], "data (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.data", false]], "data_loader (class in calzone.utils)": [[0, "calzone.utils.data_loader", false]], "data_path (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.data_path", false]], "fake_binary_data_generator (class in calzone.utils)": [[0, "calzone.utils.fake_binary_data_generator", false]], "find_optimal_prevalence() (in module calzone.utils)": [[0, "calzone.utils.find_optimal_prevalence", false]], "generate_data() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.generate_data", false]], "get_ci() (in module calzone.metrics)": [[0, "calzone.metrics.get_CI", false]], "have_subgroup (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.have_subgroup", false]], "header (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.Header", false]], "hosmer_lemeshow_test() (in module calzone.metrics)": [[0, "calzone.metrics.hosmer_lemeshow_test", false]], "labels (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.labels", false]], "linear_miscal() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.linear_miscal", false]], "logit_func() (in module calzone.metrics)": [[0, "calzone.metrics.logit_func", false]], "loss() (in module calzone.utils)": [[0, "calzone.utils.loss", false]], "lowess_regression_analysis() (in module calzone.metrics)": [[0, "calzone.metrics.lowess_regression_analysis", false]], "make_roc_curve() (in module calzone.utils)": [[0, "calzone.utils.make_roc_curve", false]], "module": [[0, "module-calzone", false], [0, "module-calzone.metrics", false], [0, "module-calzone.utils", false], [0, "module-calzone.vis", false]], "optimal_prevalence_adjustment() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.optimal_prevalence_adjustment", false]], "plot_reliability_diagram() (in module calzone.vis)": [[0, "calzone.vis.plot_reliability_diagram", false]], "plot_roc_curve() (in module calzone.vis)": [[0, "calzone.vis.plot_roc_curve", false]], "probs (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.probs", false]], "reliability_diagram() (in module calzone.utils)": [[0, "calzone.utils.reliability_diagram", false]], "removing_nan() (in module calzone.utils)": [[0, "calzone.utils.removing_nan", false]], "softmax_to_logits() (in module calzone.utils)": [[0, "calzone.utils.softmax_to_logits", false]], "spiegelhalter_z_test() (in module calzone.metrics)": [[0, "calzone.metrics.spiegelhalter_z_test", false]], "subgroup_indices (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroup_indices", false]], "subgroups (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroups", false]], "subgroups_class (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroups_class", false]], "subgroups_index (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroups_index", false]], "transform_topclass() (calzone.utils.data_loader method)": [[0, "calzone.utils.data_loader.transform_topclass", false], [0, "id1", false]]}, "objects": {"": [[0, 0, 0, "-", "calzone"]], "calzone": [[0, 0, 0, "-", "metrics"], [0, 0, 0, "-", "utils"], [0, 0, 0, "-", "vis"]], "calzone.metrics": [[0, 1, 1, "", "CalibrationMetrics"], [0, 3, 1, "", "cal_ICI"], [0, 3, 1, "", "cal_ICI_cox"], [0, 3, 1, "", "calculate_ece_mce"], [0, 3, 1, "", "cox_regression_analysis"], [0, 3, 1, "", "get_CI"], [0, 3, 1, "", "hosmer_lemeshow_test"], [0, 3, 1, "", "logit_func"], [0, 3, 1, "", "lowess_regression_analysis"], [0, 3, 1, "", "spiegelhalter_z_test"]], "calzone.metrics.CalibrationMetrics": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "bootstrap"], [0, 2, 1, "", "calculate_metrics"], [0, 2, 1, "", "optimal_prevalence_adjustment"]], "calzone.utils": [[0, 3, 1, "", "apply_prevalence_adjustment"], [0, 1, 1, "", "data_loader"], [0, 1, 1, "", "fake_binary_data_generator"], [0, 3, 1, "", "find_optimal_prevalence"], [0, 3, 1, "", "loss"], [0, 3, 1, "", "make_roc_curve"], [0, 3, 1, "", "reliability_diagram"], [0, 3, 1, "", "removing_nan"], [0, 3, 1, "", "softmax_to_logits"]], "calzone.utils.data_loader": [[0, 4, 1, "", "Header"], [0, 2, 1, "id0", "__init__"], [0, 4, 1, "", "data"], [0, 4, 1, "", "data_path"], [0, 4, 1, "", "have_subgroup"], [0, 4, 1, "", "labels"], [0, 4, 1, "", "probs"], [0, 4, 1, "", "subgroup_indices"], [0, 4, 1, "", "subgroups"], [0, 4, 1, "", "subgroups_class"], [0, 4, 1, "", "subgroups_index"], [0, 2, 1, "id1", "transform_topclass"]], "calzone.utils.fake_binary_data_generator": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "abraitary_miscal"], [0, 4, 1, "", "alpha_val"], [0, 4, 1, "", "beta_val"], [0, 2, 1, "", "generate_data"], [0, 2, 1, "", "linear_miscal"]], "calzone.vis": [[0, 3, 1, "", "plot_reliability_diagram"], [0, 3, 1, "", "plot_roc_curve"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["py", "attribute", "Python attribute"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "py:attribute"}, "terms": {"": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11], "0": [0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "000": [4, 10, 13], "00037947714112375366": 13, "001": [0, 7, 10], "0012819521292332472": 14, "002": 10, "003": 10, "0038637011857438034": 14, "004": 10, "0046758796391944": 9, "005": 10, "00558856942568957": [7, 10, 13], "005610391483826338": [4, 7, 10, 13], "006": [10, 13], "007": 10, "007508966220374058": 9, "008": 10, "008733966945443138": [5, 10, 13], "008745511902314453": 9, "009": [10, 13], "009313116424641145": 14, "009458033653818828": [5, 10, 13, 14], "009458033653818974": 14, "009608653731328977": [5, 10, 13, 14], "009608653731329372": 14, "01": [10, 13], "010355911839501922": 9, "011": 10, "012": [10, 13], "01208775955804901": [5, 10, 13], "013671230516636386": 9, "014081013182402267": 9, "016": 10, "017": 10, "018": 10, "019": 10, "02": [4, 10, 13], "020": 10, "020515047600205394": 14, "020515047600205505": [5, 10, 13, 14], "021": [10, 13], "022": [9, 10], "023": [10, 13], "02324031223486256": [5, 10, 13], "025": [4, 10], "028": 10, "029": 4, "029403495083063648": 9, "029652031234071": 14, "03": 10, "031": 10, "032": 10, "03353272525376893": [4, 7, 10, 13], "034": [4, 10, 13], "035": 10, "036": 10, "037": [10, 13], "038": [10, 13], "039": [10, 13], "039000000000000035": 4, "03926468843081932": 14, "03926468843081976": [5, 10, 13, 14], "04": 9, "040": 4, "041": 10, "042": [10, 13], "043": 10, "044": 10, "04497652296600376": [4, 7, 10, 13], "045": [10, 13], "0450": 4, "04599": 5, "046": 10, "048": [10, 13], "04848338618970194": [5, 10, 13], "049": [10, 12, 13], "05": [0, 6, 12], "050": 10, "051": [4, 10, 13], "0512096313774626": [4, 7, 10, 13], "052": 10, "054": 10, "055": [10, 13], "056": 10, "05600000000000005": 4, "058": 10, "062": 10, "063": [10, 13], "064": [10, 13], "065": [10, 13], "066": 10, "068": 10, "06894458205508802": 9, "069": 10, "06991428582761099": 13, "07": [0, 10, 13], "071": 10, "072": 10, "073": 10, "07356445428053172": 7, "074": [10, 13], "07429481165606829": 13, "07479369479609524": 13, "075": [10, 13], "076": 10, "0765892898243467": 13, "07658928982434714": 13, "07692218401743334": 7, "077": [10, 13], "07746407648179383": 13, "07796623141885761": 13, "078": [10, 13], "07872518570110512": 13, "079": [10, 13], "07961758926734244": 9, "08": 10, "081": 10, "083": 10, "0830845954424": 14, "084": 10, "0841517729106883": 9, "0841517733462589": 9, "085": 10, "086": 10, "087": 10, "0884212323912401": 14, "09": 10, "091": 10, "092": 10, "0962280216661371": 9, "0984122810555748": 7, "0x7fe048e8b470": 5, "1": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "10": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13], "1000": [0, 4, 6, 10, 12], "10000": [5, 6, 12], "1002": [4, 7, 12], "101": 9, "1016": 9, "1027": 10, "1043": 6, "1069": 6, "107": 10, "108": 10, "109": 10, "1093": [4, 7, 9], "11": 9, "11017646753618837": 14, "111": 10, "1135847825485915": 14, "1138": [10, 13], "114": 10, "115": 10, "1156": 10, "116": [6, 10], "117": 6, "1175": 11, "1177": 9, "11820": 9, "12": 9, "123": [4, 6, 9, 10, 12, 13], "12348577118577644": [4, 7, 10, 13], "124": 10, "125": 10, "126": 10, "127": 10, "12775157222121533": 9, "1292": 10, "13": [10, 11], "1322110153978738": 14, "1327565894838103": 13, "133": [10, 13], "135": [10, 12], "1355": 10, "139": 10, "1393": 10, "1394": 9, "14": 11, "140": [10, 13], "14045600565696226": 13, "1409": 9, "14090872416947742": 13, "141": [10, 13], "1446": 9, "145": 10, "1457": 9, "15": [4, 7, 9, 10, 11, 14], "150": 0, "151": 10, "1512": 12, "1519": 12, "152": 10, "155": 10, "158": 10, "16": 6, "1609": 5, "16250572519432438": 13, "163": [10, 13], "168": 10, "1706": 5, "171": [10, 13], "1714434139853": 13, "175": 10, "18": [10, 13], "182": 10, "1844": 10, "185": 10, "1857": 10, "1927": 4, "1928": 4, "1958": 4, "1979": 7, "1980": 6, "1986": 12, "1997": 6, "1e": [0, 7], "1st": 4, "2": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13], "20": [6, 12], "2007": 11, "2008": [9, 12], "2009": 12, "2010": 9, "2011": 9, "2015": 5, "2017": 5, "2018": [4, 9], "2019": [5, 7], "202": 10, "2020": [4, 5, 7, 9], "2022": 5, "2024": 4, "203": 10, "204": 10, "205": 10, "21": [7, 10], "210": 4, "212": 4, "214": 10, "22": 11, "2246": [10, 13], "232": 10, "24": 10, "244": 10, "249": 10, "26": 10, "262": 4, "27": [4, 7, 9, 10, 13], "273": 10, "28": 10, "29": [5, 10], "2_": 6, "2p_i": 12, "3": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "300": 5, "31": 10, "319": 10, "327": [10, 13], "33": 10, "3300531914893617": 9, "3341": 10, "335": 10, "34": [4, 10, 12], "3465": 4, "352": [10, 13], "35209071874348785": [6, 10, 13], "368": 7, "37": 10, "376": [10, 13], "37632691618773545": 14, "3763269161877356": [10, 12, 13], "38": 7, "385628": 4, "39": [4, 5, 6, 7, 10, 12, 14], "3900": 10, "391": 10, "392": 10, "396": 10, "4": [0, 4, 5, 6, 7, 9, 10, 12, 14], "40": 11, "4051": 7, "4065": 7, "42": 11, "429": [10, 13], "43": 11, "4385762101924": 13, "439": [10, 13], "443": 10, "4436": 4, "4438": 4, "454": 10, "456": 12, "463": 10, "469": 10, "470": 10, "478": [10, 13], "47838663128188996": 13, "481": 10, "494": 10, "495": 10, "497": 10, "49863799264980607": 9, "4998": 4, "4999": 4, "5": [0, 4, 5, 6, 7, 9, 10, 11, 12, 14], "50": 11, "5000": [4, 9, 10], "507": [10, 13], "5071793536874274": 13, "508": 10, "524": 10, "526": 10, "53": 4, "5359720760929648": 13, "536": [10, 13], "539": 10, "554": 10, "569": 10, "577": 10, "581": 10, "584": 10, "5958290924064796": 9, "6": [0, 4, 5, 6, 10, 14], "611": 10, "621": [4, 7], "626": 10, "631": 10, "633": [4, 7], "637": 10, "638": 10, "641": [10, 13], "643": 12, "651": 11, "661": 11, "664": 10, "666": 10, "668": [10, 13], "67": 10, "681": 10, "686": 10, "6897839569176842": 9, "694947603203135": [6, 10, 13], "695": [10, 13], "696": [10, 13], "698": 10, "7": [0, 4, 6, 10], "7066738713391099": [10, 12, 13], "7066738713391101": 14, "707": [10, 13], "716": 10, "727": 10, "729": 10, "732": [10, 13], "74": 7, "7486601568004448": 14, "778": 10, "779": 10, "781": 10, "7837388214288888": 9, "794": 10, "8": [0, 5, 6, 10, 13], "813": 10, "818": 10, "8281": 7, "829": 7, "836": 7, "842": [10, 13], "87": 9, "8754203499121678": 9, "8754203499121679": 9, "8835446575708198": [6, 10, 13], "884": [10, 13], "884991559088098": [6, 10, 13], "885": [10, 13], "886": 10, "888": 10, "9": [0, 6, 10], "9097229915817346": 14, "910": [10, 13], "93575342117766": 13, "936": [10, 13], "937": [4, 10, 13], "9372902801721911": [4, 7, 10, 13], "940": 10, "9400481147756811": 9, "942": 10, "95": [10, 11], "96": 0, "9602": 5, "971": 10, "975": 4, "977": 10, "9781118445112": 4, "981": 10, "990": 10, "9939": 4, "994": [10, 13], "9942": 4, "9942499557748269": [4, 7, 10, 13], "996": 10, "9990721119864874": 14, "A": [0, 4, 6, 7, 9, 11, 13], "And": 9, "But": 11, "By": 9, "FOR": 6, "For": [0, 4, 8, 9, 10, 12], "If": [0, 4, 8, 10], "In": [4, 5, 6, 7, 9, 10, 12, 13, 14], "It": [0, 1, 5, 6, 7, 10, 11, 12], "No": [4, 5, 9], "OF": 6, "On": 5, "One": 6, "Or": 10, "THE": 6, "That": 4, "The": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12], "Then": [3, 10], "There": [4, 5, 6, 10], "These": 0, "To": [1, 3, 4, 7, 8, 9, 10, 14], "_": [5, 6, 12, 14], "__init__": [0, 2], "aaai": 5, "ab": [5, 9], "about": [5, 6, 11, 14], "abov": [5, 9, 10], "abraitary_misc": [0, 2], "absenc": 0, "absolut": 0, "ac": 5, "academ": 9, "acc": 5, "access": 8, "accordingli": 0, "account": 5, "accur": [1, 8, 12], "accuraci": [0, 5, 7, 11], "achiev": 8, "acra": 9, "across": 0, "actual": [0, 7], "adapt": [5, 9], "add": [0, 9], "addint": 9, "addit": 0, "adjust": [0, 1, 4, 8, 10, 11], "adjusted_p": 9, "adjusted_preval": 0, "adjusted_prob": 0, "adjusted_proba": 0, "adopt": 5, "advantag": [4, 6, 7], "affect": [5, 6, 7, 9], "after": [8, 9, 12], "after_preval": 9, "again": [6, 8], "against": [0, 11], "aim": [5, 8], "aka": [4, 9], "al": [5, 6], "alern": 13, "all": [0, 3, 4, 5, 8, 10, 13, 14], "allow": 0, "alpha": [0, 6, 12], "alpha_v": [0, 2, 4, 6, 9, 10, 12], "alreadi": 10, "also": [4, 5, 6, 7, 9, 10, 12, 13], "altern": [4, 5, 7, 10], "alternativli": 14, "although": 4, "alwai": 5, "american": [4, 7], "an": [0, 4, 5, 6, 7, 9, 10, 13], "analysi": [0, 1], "ani": [0, 6, 7, 12], "append": [4, 6, 12], "appli": [0, 8, 9, 10], "applic": [0, 4, 9, 13], "apply_prevalence_adjust": [0, 2, 9], "approach": 14, "approxim": 12, "ar": [0, 3, 4, 5, 6, 10, 13], "arang": 5, "arbitrari": [0, 7], "area": 0, "argument": [0, 9, 10], "around": 4, "arrai": [0, 4, 6, 12], "arrieta": 5, "artifici": [5, 11], "arxiv": [5, 9], "assess": [0, 1, 4, 6, 8], "associ": [0, 4, 7], "assum": [0, 9, 10, 11], "assumpt": 6, "auc": 0, "austin": 7, "automat": 10, "avail": 0, "averag": [0, 5, 7, 8, 11], "avoid": 0, "b": [4, 9, 12, 13], "b_m": 5, "back": 0, "background": 1, "bad": 11, "bar": [0, 11], "base": [0, 5, 6], "basic": 10, "batch": 1, "bay": 9, "bayesian": 5, "becaus": 5, "becom": [9, 14], "been": [4, 6], "befor": [1, 9], "before_preval": 9, "being": 9, "below": [4, 5, 11], "bernoulli": 11, "beta": [0, 10], "beta_v": [0, 2, 4, 6, 9, 10, 12], "better": 0, "between": [0, 4, 5, 9], "beyond": 4, "big": 4, "biggest": [5, 6], "bin": [0, 1, 4, 6, 7, 8, 10, 11, 12], "bin_count": [0, 5, 6, 7, 11, 14], "bin_counts_0": 11, "bin_counts_1": 11, "bin_edg": [0, 5, 6, 7, 11, 14], "binari": [0, 1, 4, 8, 9, 10, 11, 14], "binomi": 10, "biostatist": 9, "blue": 11, "bool": [0, 9, 11], "boostrap": 10, "bootstrap": [0, 1, 2, 10], "bootstrap_ci": 10, "both": [0, 4, 5, 6, 7, 9], "bound": 0, "breast": 9, "brier": 12, "brocker": 12, "br\u00f6cker": [11, 12], "c": [0, 5, 6, 7, 9, 10, 13, 14], "cal_ici": [0, 2], "cal_ici_cox": [0, 2, 7], "cal_metr": [4, 6, 10, 12, 13, 14], "calcualt": 8, "calcuat": 11, "calcul": [0, 1, 8, 9, 10, 11], "calculate_ece_mc": [0, 2, 5], "calculate_metr": [0, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14], "calib": 5, "calibbr": 11, "calibr": [0, 1, 6, 9, 10, 11, 12, 13, 14], "calibrationmetr": [0, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14], "call": [4, 5, 6, 7, 12], "callabl": 0, "calmetr": [4, 5, 6, 7, 9, 12], "calster": 4, "calzon": [3, 11, 13, 14], "can": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "cancer": 9, "cannot": 6, "cao": 1, "capabl": 1, "captur": [4, 7], "care": 14, "carlo": 0, "case": [4, 7, 9, 10, 12], "caus": 5, "caution": 8, "cd": 10, "cdot": [4, 9], "certain": 4, "cessi": 6, "chanc": 4, "chang": [4, 5], "characterist": 0, "chart": 0, "check": [0, 6, 12], "chen": 9, "chi": [0, 6], "chi_squar": 0, "choic": [7, 9, 11], "choos": 4, "ci": 10, "class": [0, 1, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14], "class_1_index": [9, 11], "class_1_sampl": [9, 11], "class_to_calcul": [0, 4, 5, 6, 7, 9, 10, 12, 13, 14], "class_to_plot": [0, 5, 6, 7, 11, 14], "classif": [0, 1, 8, 9, 10, 11, 14], "classifi": 9, "clasto": 11, "clean": 0, "cleveland": 7, "cli": 10, "clinic": [4, 7, 12], "clip": [0, 7], "clone": 10, "close": [0, 4, 7], "closer": 4, "coef": [0, 4, 7, 9, 10, 13, 14], "coef_ci": 0, "coeffici": 0, "color": [0, 6, 7, 12], "column": [0, 10, 13], "com": 10, "comma": 10, "command": [1, 3, 13], "common": [4, 7, 14], "commun": 6, "compar": [0, 6, 12], "comparison": 6, "compon": 9, "comprehens": [1, 8], "comput": [0, 4, 5, 7], "con": [1, 8], "conda": 10, "conf": 5, "confer": 5, "confid": [0, 1, 4, 5, 10, 11], "confind": [0, 5, 6, 7, 11, 14], "confindence_0": 11, "confindence_1": 11, "conjunct": 0, "connect": 0, "consid": [8, 14], "const": 4, "constant": 1, "constraint": 4, "contain": [0, 8, 13], "content": 2, "continu": 9, "control": 4, "convent": 5, "convention": 6, "converg": 4, "convert": [0, 14], "cooper": 5, "correct": [0, 7, 9, 11, 12, 14], "correspond": [0, 8, 10], "could": [5, 6, 7, 11], "count": [0, 5, 6], "covari": 4, "coverag": [1, 8], "cox": [0, 1, 8, 9, 10, 13, 14], "cox_func": 7, "cox_ici": 7, "cox_intercept": [4, 7], "cox_intercept_ci": [4, 7], "cox_intercept_lowerci": 4, "cox_intercept_upperci": 4, "cox_p_correct": 7, "cox_regression_analysi": [0, 2, 4, 7], "cox_slop": [4, 7], "cox_slope_ci": [4, 7], "cox_slope_lowerci": 4, "cox_slope_upperci": 4, "creat": [0, 11, 13], "credit": 1, "cross": [9, 12], "csv": [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14], "csv_file": [10, 13], "cumul": 7, "current": [3, 4], "curv": [0, 1], "custom": 0, "custom_color": [0, 11], "d": [4, 5, 6, 9, 12], "data": [0, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "data_load": [0, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "data_path": [0, 2, 4, 5, 6, 7, 11, 12], "dataset": [1, 8, 9, 10, 13, 14], "date": 4, "david": 12, "decil": 6, "decompos": 12, "decomposit": 12, "deep": 5, "default": [0, 6, 10], "defin": [5, 12], "degre": [0, 6], "delta": [0, 7], "demonstr": [5, 9, 11], "demostr": [4, 6, 7], "denomin": 9, "densiti": [0, 6, 12], "dep": 4, "depend": [0, 4, 5, 6, 8, 9, 10], "deriv": [9, 10], "describ": 7, "descript": 8, "detail": [0, 4, 8, 13], "detect": 10, "determin": [0, 6, 8], "develop": [3, 10], "deviat": [5, 7, 8], "df": [0, 4, 6], "df_": [5, 7], "diagnosi": 9, "diagnost": 9, "diagram": [0, 1, 5, 6, 7, 8, 10, 13, 14], "dict": 0, "dictionari": 0, "didsr": 10, "differ": [0, 4, 5, 6, 7, 9, 14], "directli": [5, 7, 9, 10, 12], "directori": [3, 10], "disadvantag": [5, 7], "discrimin": 12, "discuss": [9, 11], "diseas": 9, "displai": 0, "distanc": 0, "distribut": [0, 5, 6, 7, 9, 10, 12], "diverg": [9, 12], "divid": [0, 6], "do": [4, 5, 13], "document": 8, "doe": [6, 12], "doesn": [4, 7, 9, 12], "doi": [4, 5, 7, 9, 11, 12], "don": 10, "done": [5, 9], "dot": 0, "dpi": 0, "drop": [5, 9, 11], "drop_indic": [9, 11], "dtype": [9, 11], "due": 8, "dure": 13, "dusenberri": 5, "e": [1, 4, 5, 7, 9, 10, 12], "e_": 6, "each": [0, 6, 8, 10, 11, 13, 14], "easi": [5, 6], "easili": 7, "ec": [0, 1, 4, 7, 8, 9, 10, 13, 14], "ece_": 8, "ece_c": 5, "ece_c_classon": 5, "ece_c_top_class": 5, "ece_equal_width": 5, "ece_h": 5, "ece_h_classon": 5, "ece_h_top_class": 5, "ed": 4, "edg": 0, "effect": 5, "eg": 13, "els": [4, 6, 12], "empir": [0, 7], "encod": 0, "entropi": [9, 12], "enumer": 13, "epsilon": 0, "equal": [0, 4, 5, 6, 11, 14], "equat": 9, "equival": [5, 9], "err": 4, "error": [0, 1, 7, 8, 11, 12], "error_bar": [0, 11, 14], "essenti": [1, 7, 8], "estim": [0, 1, 4, 7, 8, 9], "et": [5, 6], "eta": 9, "etc": [1, 13], "evalu": 0, "evenli": 5, "event": [0, 6], "evid": 12, "exact": [4, 11], "examin": [4, 6, 8], "exampl": [0, 4, 7, 10, 11, 12, 13], "example_data": [4, 5, 6, 7, 10, 11, 12, 13, 14], "exceed": 4, "except": [8, 14], "exepect": 1, "exit": 10, "exp": 0, "expand": 9, "expect": [0, 5, 6, 7, 8], "experi": 5, "experienc": 10, "explan": 8, "explicitli": [5, 6], "extens": [1, 5], "extract": 0, "extrem": 12, "f": [0, 4, 7, 9, 12, 13], "f_": 7, "fact": [9, 12], "factor": [0, 9], "fake": [0, 6], "fake_binary_data_gener": [0, 2, 4, 6, 9, 12], "fakedata_gener": [4, 6, 9, 12], "fals": [0, 4, 5, 6, 7, 9, 10, 11, 12], "fan": 1, "featur": [1, 3], "field": [0, 13], "fig": 7, "figur": 0, "file": [0, 10, 13], "find": [0, 1, 6, 9], "find_optimal_preval": [0, 2, 9], "first": [0, 1, 4, 5, 6, 9, 10, 12], "fit": [0, 1, 4, 6], "fix": [0, 4, 5], "fix_intercept": [0, 4], "fix_slop": [0, 4], "flag": [0, 10], "float": 0, "follow": [0, 3, 5, 6, 9, 10, 11, 12, 13], "foral": 9, "forecast": 11, "format": [0, 10, 13], "formula": 7, "fpr": 0, "frac": [4, 5, 6, 7, 9, 12], "fraction": 0, "freedom": [0, 6], "frequenc": [0, 6], "frequent": 0, "from": [0, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "full": [0, 10], "func": 0, "function": [0, 1, 4, 6, 7, 9, 10, 11, 12, 14], "further": [4, 9], "g": [1, 5], "gabriel": [4, 7], "gener": [0, 4, 6, 8, 9, 10, 12], "generate_data": [0, 2, 4, 6, 9, 12], "generate_miscal_data": 10, "generate_subgroup_data": 10, "generate_wellcal_data": 10, "get_ci": [0, 2], "giger": 9, "git": 10, "github": 10, "give": [4, 5, 7], "given": [0, 8, 9, 11, 13], "glaser": 9, "go": 11, "goe": 5, "good": [0, 5, 6, 7, 11], "granular": 0, "graphic": 0, "greater": 6, "greatli": 7, "grid": 7, "group": [0, 5, 6, 7, 11, 13], "gt": [4, 5], "gu": 9, "gui": [1, 10], "gui_cal_metr": 3, "guid": [1, 4], "gujral": 5, "guo": 5, "h": [0, 5, 6, 9, 10, 13, 14], "ha": [4, 10, 12, 13], "had": 6, "half": 9, "hand": 7, "handl": [0, 14], "hat": [4, 5, 7, 8, 9], "hauskrecht": 5, "have": [0, 1, 4, 6, 7, 8, 9, 10, 11, 14], "have_subgroup": [0, 2, 13], "header": [0, 2, 10], "heavili": 5, "help": 10, "helper": 10, "here": 9, "high": 4, "higher": 0, "hist": [6, 12], "histtyp": [6, 12], "hl": [0, 1, 4, 8, 10, 12, 13, 14], "hl_c_p": 6, "hl_c_pvalu": 6, "hl_c_t": 6, "hl_h_p": 6, "hl_h_pvalu": 6, "hl_h_t": 6, "hope": 1, "horsch": 9, "hosmer": [0, 1, 8, 12], "hosmer_lemeshow_test": [0, 2, 6], "hot": 0, "how": [4, 5, 6, 8, 9], "howev": [5, 6, 7, 9, 12], "hsu": 9, "http": [4, 5, 7, 9, 10, 11, 12], "huang": [4, 7], "hyperparamet": [7, 12], "hypothesi": [6, 12], "i": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "ibarra": 5, "ici": [0, 1, 4, 8, 9, 10, 13, 14], "idea": 7, "ideal": 6, "ident": 0, "ig": 7, "ignor": 5, "illur": 10, "imbalanc": 9, "implement": [6, 7], "implicitli": 9, "import": [4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "improv": 1, "inch": 0, "includ": [0, 1, 4, 8, 10], "incorrect": 6, "increas": [5, 11], "independ": [0, 9], "index": [0, 1, 8], "indic": [0, 4, 6], "industri": 6, "inform": [0, 11], "informat": [4, 7], "initi": 0, "input": [0, 10, 13], "insid": 10, "inspir": 12, "instabl": 0, "instal": [1, 3], "instead": [0, 6, 10], "int": [0, 9, 11], "int_0": [5, 7], "intecept": 1, "integr": [0, 1, 8], "intellig": [5, 9], "intend": [1, 8], "interact": 4, "intercept": [0, 1, 7, 8, 9, 10, 13, 14], "intercept_ci": 0, "interest": [0, 1, 4, 5, 8, 10, 13, 14], "interestingli": 7, "interfac": [1, 13], "intern": 0, "interpol": 0, "interpret": [5, 6, 7, 14], "interv": [0, 1, 4, 10, 11], "intrins": 9, "intuit": [5, 6], "invers": 0, "is_equal_freq": [0, 5, 6], "iter": 4, "itself": 7, "j": [5, 9, 11, 12, 13], "jamia": [4, 7], "jason": 1, "jerfel": 5, "journal": [4, 7, 12], "just": [5, 6, 13], "k": [5, 7, 9], "kei": [0, 1, 4, 6, 9, 12], "kenett": 4, "keyword": 0, "kira": 9, "kl": [9, 12], "kwarg": 0, "kwok": 1, "kxq045": 9, "l": [4, 5, 6, 7, 9, 11], "label": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "last": 10, "lead": [4, 9, 11], "learn": [1, 5, 8], "left": [6, 9], "legend": [5, 6, 7, 12], "lemesbow": 6, "lemeshow": [0, 1, 8, 12], "len": [5, 9, 11], "level": 0, "li": [4, 7], "librari": 11, "like": [0, 4, 7, 12], "likelihood": [4, 9, 11], "limit": 12, "line": [0, 1, 3, 7, 13], "linear": [0, 4, 7], "linear_misc": [0, 2], "linestyl": [6, 12], "list": [0, 4, 6, 10, 12], "literatur": [5, 6], "liu": 9, "ll": 4, "llr": 4, "load": [0, 4, 5, 6, 7, 11], "loader": 10, "loc": [6, 12], "local": [0, 7, 10], "loess": [0, 1, 8, 9, 10, 13, 14], "loess_ici": 7, "loess_ici2": 7, "log": [0, 4, 9], "logical_and": 4, "logist": [0, 4, 6, 7], "logit": [0, 1, 4], "logit_func": [0, 2, 7], "longford": 4, "look": [7, 11], "loop": 13, "loss": [0, 2, 9], "low": [4, 6], "lower": 0, "lowerci": [4, 7, 9, 10, 13, 14], "lowess": 0, "lowess_fit_p": 7, "lowess_fit_p2": 7, "lowess_fit_p2_correct": 7, "lowess_fit_p_correct": 7, "lowess_regression_analysi": [0, 2, 7, 9], "lr": 9, "lt": 5, "lung": 1, "m": [5, 6, 9, 10, 13], "machado": [4, 7], "macheret": [4, 7], "machin": [1, 5, 8], "mai": 5, "main": 7, "make_roc_curv": [0, 2], "manag": 12, "mani": [6, 7, 12, 13], "mannual": 9, "margin": 9, "marker": [7, 9], "mask": [9, 11], "match": 9, "mathbb": [5, 7, 8], "matplolib": 10, "matplotlib": [0, 5, 6, 7, 10, 12], "max": [5, 6, 12], "max_": 5, "max_m": 5, "max_p": 5, "maximum": [0, 1, 4], "mce": [0, 1, 4, 10, 13, 14], "mce_": 8, "mce_c_classon": 5, "mce_c_top_class": 5, "mce_h_classon": 5, "mce_h_top_class": 5, "mean": [0, 4, 5, 6, 7, 8, 9, 11, 12], "meaning": [1, 8], "measur": [0, 4, 5, 7, 12], "medic": [4, 7, 9], "medicin": 7, "messag": 10, "meteorolog": 12, "method": [0, 4, 5, 6, 7, 9, 11], "metric": [1, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14], "metrics_c": 13, "metz": 9, "mi": [9, 11], "might": [9, 11], "min_": 9, "minim": [0, 9], "miscal_dataload": 7, "miscal_funct": 0, "miscal_scal": [0, 10], "miscalibr": [0, 4, 7, 10, 12], "mislead": 11, "mle": 4, "model": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12], "moder": [4, 8], "modern": 5, "modifi": 5, "modul": 2, "monoton": 9, "mont": 0, "more": [0, 4, 8, 10, 11, 13], "most": [5, 7, 8, 14], "much": 5, "multi": [0, 1, 8, 10, 14], "multiclass": [0, 1], "multipl": [0, 6], "must": [0, 10], "n": [4, 5, 7, 9, 10, 12, 13], "n_bootstrap": 10, "n_class": 0, "n_m": 6, "n_point": 0, "n_sampl": 0, "naeini": 5, "name": [0, 12], "nan": [0, 4], "nbin": 0, "ndarrai": 0, "necessari": 11, "need": [0, 3, 4, 6, 7, 8, 10], "network": 5, "neural": 5, "new": [0, 4, 5, 9], "nicegui": [3, 10], "nixon": 5, "non": [6, 7, 12], "none": [0, 5, 11], "nonrobust": 4, "normal": 12, "note": 0, "notebook": [4, 8, 9, 10, 14], "notic": [5, 6, 7, 9, 10], "now": 14, "np": [4, 5, 6, 7, 9, 11, 12, 13], "null": [4, 6, 12], "num_bin": [0, 5, 6, 7, 10, 11, 13, 14], "number": [0, 4, 5, 6, 10], "numer": 0, "numpi": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13], "o_": 6, "object": 0, "observ": [0, 4, 5, 6, 8], "obtain": 5, "oct": 4, "ocz228": [4, 7], "odd": 4, "off": 7, "offer": [1, 6], "often": 6, "ohno": [4, 7], "one": [0, 10, 13], "ones": [9, 11], "onli": [0, 1, 4, 5, 8, 10, 11, 13, 14], "onlin": 4, "oper": 0, "opposit": [5, 6], "optim": [0, 9], "optimal_preval": [0, 9], "optimal_prevalence_adjust": [0, 2], "option": [0, 4, 6, 10], "order": [0, 6, 10, 13], "org": [4, 5, 7, 9, 11, 12], "origin": [4, 10], "other": [0, 4, 5, 6, 7, 8, 9, 11, 12, 13], "otherwis": 0, "outcom": [0, 4, 9], "output": [0, 9, 10], "over": [4, 5, 8], "overal": [4, 10, 13], "overconfid": 11, "p": [0, 1, 4, 5, 7, 8, 9, 10, 13, 14], "p_": [4, 5, 14], "p_i": [7, 9, 12], "p_valu": [0, 12], "packag": [1, 2, 4, 10, 11, 12, 13], "pakdaman": 5, "paramet": 0, "parametr": [6, 7, 12], "part": 5, "particular": 5, "particularli": 1, "pass": 0, "path": [0, 10], "patient": 12, "peopl": 7, "pepe": 9, "per": 0, "percentil": 0, "perfect": 4, "perfectli": 7, "perform": [0, 4, 9, 10, 13], "perform_pervalance_adjust": [0, 9], "perhap": 5, "petrick": 9, "pezeshk": 9, "piegorsch": 4, "pip": [3, 10], "pleas": [1, 8], "pleiss": 5, "plot": [0, 5, 7, 8, 10, 11, 13, 14], "plot_bin": 10, "plot_reliability_diagram": [0, 2, 7, 11, 14], "plot_roc_curv": [0, 2], "plote": 7, "plt": [5, 6, 7, 12], "png": 10, "point": 0, "polynomi": 7, "poor": 0, "poorli": 0, "popul": [1, 8, 9, 13], "posit": 0, "posterior": [9, 11], "power": [6, 12], "pp": 4, "practic": 9, "predict": [0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 14], "predictor": 4, "preform": [1, 13], "prepar": 10, "preprocess": 0, "presenc": 0, "present": 0, "preval": [0, 1, 4, 8, 10, 11, 12], "prevalecn": 10, "prevalence_adjust": 10, "previou": 14, "print": [0, 4, 5, 6, 7, 9, 10, 12, 13], "print_result": [0, 4, 7], "pro": [1, 8], "prob": [0, 2, 4, 5, 6, 7, 10, 11, 12, 13, 14], "proba": 13, "proba_0": [0, 10, 13], "proba_1": [0, 10, 13], "proba_adjust": 9, "proba_class1": 7, "proba_n": [10, 13], "probabilist": [0, 5, 6, 12], "probabiliticst": 0, "probabl": [0, 4, 5, 6, 7, 8, 9, 10, 11, 14], "problem": [0, 5, 6, 8, 9, 10, 11, 14], "proce": 11, "proceed": 5, "process": [1, 10], "produc": 9, "prognost": 4, "program": 10, "project": 1, "proof": 6, "proper": 12, "proport": 0, "propos": [5, 12], "prove": 9, "provid": [0, 1, 6, 8, 9, 10, 11, 12, 13], "pseudo": 4, "purpos": 10, "py": [3, 10, 13], "pyplot": [5, 6, 7, 12], "python": [1, 3], "q": 5, "qian": 1, "qj": 12, "quantifi": 7, "quarterli": 12, "quick": 1, "quickli": [5, 6], "quickstart": 13, "r": [4, 6, 7, 12], "radiologi": 9, "random": [4, 6, 9, 11, 12], "random_se": 10, "rang": [4, 5, 6, 12], "range_of_bin": 5, "rate": 0, "ratio": [9, 11], "raw": 0, "re": 9, "read": [1, 13], "reader": 5, "real": 13, "rearrang": 9, "reason": [5, 11], "recalibr": 4, "receiv": 0, "recommend": [0, 5, 6, 8, 12], "red": 11, "refer": [1, 8], "regress": [0, 4, 6, 7], "regular": 14, "reject": 6, "relat": [7, 11], "relationship": 9, "reli": [5, 9], "reliability_0": 11, "reliability_1": 11, "reliability_diagram": [0, 2, 4, 5, 6, 7, 11, 12, 14], "reliabl": [0, 1, 5, 6, 7, 8, 10, 12, 13, 14], "relibail": 10, "remind": 5, "remov": 0, "removing_nan": [0, 2], "replac": [0, 9, 11], "repositori": 10, "reprens": [1, 8], "repres": [0, 1, 8], "represent": 5, "requir": [6, 7, 10, 12], "resampl": 0, "research": 9, "residu": [0, 4], "resolut": [0, 9, 12], "respect": 9, "rest": [1, 8, 14], "result": [0, 4, 5, 6, 7, 8, 12, 13], "return": [0, 5, 9], "return_fig": 0, "return_numpi": [0, 4, 5, 6, 12], "reveal": 11, "reweight": 0, "right": [6, 9, 12], "risk": 4, "robust": 7, "roc": 0, "roc_auc": 0, "roc_curv": 0, "rough": 5, "round": [6, 12], "row": 0, "royal": 12, "ruggeri": 4, "rule": 12, "run": [0, 1, 10, 13], "sahin": 9, "sai": 9, "same": [0, 5, 7, 9, 10, 13, 14], "sampl": [0, 5, 6, 7, 10, 11, 14], "sample_s": [0, 4, 6, 12], "samuelson": 9, "save": [0, 10], "save_diagram_output": 10, "save_metr": 10, "save_path": 0, "save_plot": 10, "scale": [0, 9], "scatter": 7, "scatterplot": [0, 7], "scheme": [5, 6], "scipi": [0, 10, 11], "scope": 4, "score": [0, 1, 4, 6, 9, 10, 11, 13, 14], "script": [1, 10], "search": 9, "second": [0, 5, 12], "section": [5, 9, 11], "see": [4, 5, 6, 8, 10, 11, 12, 13, 14], "seed": [4, 6, 9, 12], "select": 0, "self": 0, "sens": [8, 11], "sensit": 0, "separ": 10, "seri": [11, 14], "set": [5, 9, 11], "sever": 6, "shape": 0, "shift": [1, 4, 8, 11, 12], "should": [0, 6, 7, 8, 10, 13], "show": [0, 5, 6, 10, 11, 12], "shown": [4, 5, 6, 12], "signific": [0, 6], "sim": [6, 7], "similar": [5, 6], "similarili": 5, "similarli": [4, 5], "simpl": [5, 6, 13], "simpler": 5, "simpli": [3, 5, 10], "simplic": 5, "simul": [0, 13], "simulated_data_subgroup": [10, 13], "simulated_data_subgroup_result": 10, "simulated_misdata": [7, 10], "simulated_misdata_result": 10, "simulated_welldata": [4, 5, 6, 7, 10, 11, 12, 14], "simulated_welldata_diagram_output": 10, "simulated_welldata_result": 10, "simulation_s": [4, 6, 12], "sinc": [4, 6, 9, 11], "situat": 12, "size": [0, 1, 9, 11], "size_c": 6, "size_h": 6, "skip": 7, "slope": [0, 1, 8], "small": 0, "smith": 11, "smooth": [0, 7], "smoothed_proba": 0, "so": 9, "societi": 12, "softmax": [0, 11], "softmax_to_logit": [0, 2, 11], "sole": 8, "some": [4, 5, 6, 12, 13], "sort": [0, 7], "sorted_proba": 0, "sourc": 0, "space": [0, 1, 8], "span": [0, 7], "special": 11, "specif": [8, 9, 10], "specifi": [0, 6, 8, 10, 14], "speigelhalt": 12, "spieegelhalt": 1, "spiegelhalt": [0, 1, 8], "spiegelhalter_z_test": [0, 2, 12], "spiegelhalterz": [0, 10, 12, 13, 14], "spline": 7, "sqrt": 12, "squ": 4, "squar": [0, 6, 12], "standard": [6, 12], "start": 1, "stat08078": 4, "statement": 6, "statist": [0, 1, 4, 7, 9, 12], "statistician": 12, "statsmodel": 10, "statsref": 4, "std": 4, "step": [0, 6, 12], "steyerberg": [4, 7], "still": [4, 6, 7, 11, 14], "str": [0, 6, 12], "strongli": 8, "structur": 0, "studi": 6, "subgroup": [0, 1, 2, 10], "subgroup_1": [0, 10, 13], "subgroup_1_group_a": [10, 13], "subgroup_1_group_b": [10, 13], "subgroup_2": [0, 10, 13], "subgroup_class": 13, "subgroup_column": 13, "subgroup_indic": [0, 2, 13], "subgroup_m": [10, 13], "subgroups_class": [0, 2, 13], "subgroups_index": [0, 2, 13], "subject": 4, "submodul": 2, "success": 11, "suffici": [1, 8, 12], "suggest": [0, 6, 8], "suit": 1, "sum": 5, "sum_": [5, 6, 7, 9, 12], "summari": [0, 1], "sun": 5, "support": [0, 3], "t": [0, 4, 5, 6, 7, 9, 10, 12], "take": [0, 8], "tannen": 5, "target": 4, "task": 1, "techniqu": 4, "tell": 4, "tempresult": [4, 6, 12], "term": [5, 7, 9, 12], "test": [0, 1, 8, 10, 11], "test_dataload": 11, "text": [5, 6, 7, 9, 12, 14], "th": [5, 6, 7], "than": [4, 6], "thei": [5, 7], "them": [0, 8, 10], "theorem": 9, "theoret": 1, "theori": 6, "therefor": [5, 6, 7, 8, 9], "thi": [0, 1, 4, 5, 6, 9, 10, 13], "thing": 13, "those": 0, "through": [7, 13], "tian": 9, "time": [4, 9], "titl": [0, 5, 6, 7, 11, 12, 14], "tool": [0, 1, 6, 10, 11], "top": [0, 1, 5, 8, 10, 11, 14], "topclass": [5, 9, 10, 13, 14], "total": [5, 6], "tpr": 0, "tradition": 5, "train": 6, "tran": 5, "transform": [0, 8, 9, 10, 14], "transform_topclass": [0, 2, 14], "transformed_data": 14, "treat": [0, 10], "trend": 4, "trial": [11, 12], "true": [0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14], "truth": 7, "try": [7, 10], "tune": 12, "tupl": 0, "turn": 10, "tutori": [4, 7, 10], "two": [4, 5, 6, 9, 13], "tygert": 5, "type": [0, 4, 5, 7], "typic": [0, 6], "u": [4, 5, 11], "ubgroup_2": 10, "uitliti": 0, "under": [0, 3, 4, 8, 12], "understand": 6, "uniqu": 0, "up": 5, "upper": [0, 6, 12], "upperci": [4, 7, 9, 10, 13, 14], "us": [0, 1, 3, 4, 5, 6, 8, 9, 11, 13, 14], "usag": 10, "useag": 10, "user": [4, 14], "usual": [4, 6, 9], "util": [2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "v": [1, 4, 5, 8, 10, 14], "v29i1": 5, "valid": 6, "valu": [0, 1, 4, 10, 11, 13, 14], "var": 12, "variabl": 4, "varianc": 12, "variou": [0, 1], "verbos": [10, 13], "veri": [6, 11, 12], "versa": 4, "version": [0, 5], "vi": [2, 7, 11, 14], "vice": 4, "visual": [0, 1, 8, 10, 11], "vline": [6, 12], "vstack": 11, "w": [4, 5, 6, 7, 9], "wa": 12, "waf993": 11, "wai": [4, 5, 6, 7, 13, 14], "want": [5, 10], "warn": 4, "we": [1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "weak": 4, "weather": 11, "wed": 4, "weight": [0, 5, 7], "weinberg": 5, "well": [0, 4, 5, 6, 7, 10, 11, 12, 14], "wellcal_dataload": [4, 5, 6, 7, 11, 12], "when": [0, 5, 6, 8, 9, 10, 14], "where": [0, 5, 6, 7, 9, 10, 11, 12, 13], "whether": [0, 4, 6, 8, 9, 10, 12, 13, 14], "which": [0, 4, 5, 7, 8, 9, 10, 11, 12], "while": [4, 5, 6, 7], "who": 12, "whole": 10, "wide": [0, 5, 6], "width": [5, 6, 7], "wilei": 4, "wilson": [0, 11], "window": 7, "within": [0, 9, 13], "without": 0, "word": 9, "work": [1, 6, 8, 10, 14], "workstat": 9, "world": 13, "wrong": 6, "x": [0, 4, 6, 7, 9, 12], "x1": 4, "x_1": 9, "x_2": 9, "x_i": 12, "xlabel": [5, 6, 7, 12], "xu": 5, "y": [0, 4, 5, 6, 7, 8, 9, 12], "y_": 5, "y_i": 9, "y_predict": 0, "y_proba": 0, "y_true": 0, "ylabel": [6, 7, 12], "ylim": 5, "ymax": [6, 12], "ymin": [6, 12], "you": [1, 3, 4, 7, 8, 10, 13, 14], "your": [1, 10], "z": [0, 1, 4, 8, 9], "z_score": [0, 12], "zero": 5, "zhang": 5}, "titles": ["calzone package", "Welcome to the documentation for calzone", "calzone", "Running GUI", "COX calibration analysis", "Exepected Calibration Error(ECE) and Maximum Calibration Error (MCE)", "Hosmer-Lemeshow test (HL test)", "Integrated Calibration Index (ICI)", "Summary and guide for calzone", "Prevalence adjustment", "Quick Start", "Reliability diagram", "Spiegelhalter\u2019s Z-test", "Subgroup analysis", "Multiclass extension"], "titleterms": {"": 12, "adjust": 9, "analysi": [4, 13], "background": [4, 5, 6, 7, 12], "bin": 5, "calcul": [4, 5, 6, 7, 12], "calibr": [4, 5, 7, 8], "calzon": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 12], "class": 9, "command": 10, "con": [4, 5, 6, 7, 12], "constant": 9, "content": [0, 1], "cox": [4, 7], "curv": 7, "diagram": 11, "document": 1, "ec": 5, "error": 5, "estim": 5, "exepect": 5, "extens": 14, "fit": 7, "function": 5, "gui": 3, "guid": 8, "hl": 6, "hosmer": 6, "ici": 7, "index": 7, "instal": 10, "intecept": 4, "integr": 7, "intercept": 4, "interest": 9, "interfac": 10, "lemeshow": 6, "line": 10, "loess": 7, "logit": 9, "maximum": 5, "mce": 5, "metric": [0, 8], "modul": 0, "multiclass": 14, "p": [6, 12], "packag": 0, "preform": 9, "preval": 9, "pro": [4, 5, 6, 7, 12], "python": 10, "quick": 10, "refer": [4, 5, 6, 7, 9, 11, 12], "reliabl": 11, "run": 3, "score": 12, "shift": 9, "size": [4, 5, 6, 12], "slope": 4, "spieegelhalt": 12, "spiegelhalt": 12, "start": 10, "statist": 6, "subgroup": 13, "submodul": 0, "summari": 8, "test": [4, 6, 12], "theoret": [4, 5, 6, 7, 12], "us": [7, 10, 12], "util": 0, "valu": [6, 12], "vi": 0, "visual": 7, "welcom": 1, "z": 12}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"COX calibration analysis": [[4, null]], "Calculating Cox slope and intercept with calzone": [[4, "Calculating-Cox-slope-and-intercept-with-calzone"]], "Calculating ECE and MCE with calzone": [[5, "Calculating-ECE-and-MCE-with-calzone"]], "Calculating HL test statistics and p-value with calzone": [[6, "Calculating-HL-test-statistics-and-p-value-with-calzone"]], "Calculating LOESS ICI and COX ICI using calzone": [[7, "Calculating-LOESS-ICI-and-COX-ICI-using-calzone"]], "Calculating the Spieegelhalter Z score and p-value using calzone": [[12, "Calculating-the-Spieegelhalter-Z-score-and-p-value-using-calzone"]], "Command line interface": [[10, "Command-line-interface"]], "Cons of Cox calibration analysis": [[4, "Cons-of-Cox-calibration-analysis"]], "Cons of ECE and MCE": [[5, "Cons-of-ECE-and-MCE"]], "Cons of HL Test": [[6, "Cons-of-HL-Test"]], "Cons of ICI": [[7, "Cons-of-ICI"]], "Cons of Spiegelhalter\u2019s Z test": [[12, "Cons-of-Spiegelhalter's-Z-test"]], "Contents:": [[1, null]], "ECE and MCE as function of bin size": [[5, "ECE-and-MCE-as-function-of-bin-size"]], "Estimated ECE and MCE": [[5, "Estimated-ECE-and-MCE"]], "Exepected Calibration Error(ECE) and Maximum Calibration Error (MCE)": [[5, null]], "Guide to calzone and calibration metrics": [[8, "Guide-to-calzone-and-calibration-metrics"]], "Hosmer-Lemeshow test (HL test)": [[6, null]], "Installation": [[10, "Installation"]], "Integrated Calibration Index (ICI)": [[7, null]], "Module contents": [[0, "module-calzone"]], "Multiclass extension": [[14, null]], "Preform prevalence adjustment in calzone": [[9, "Preform-prevalence-adjustment-in-calzone"]], "Prevalence adjustment": [[9, null]], "Prevalence adjustment and constant shift in logit of class-of-interest": [[9, "Prevalence-adjustment-and-constant-shift-in-logit-of-class-of-interest"]], "Pros of Cox calibration analysis": [[4, "Pros-of-Cox-calibration-analysis"]], "Pros of ECE and MCE": [[5, "Pros-of-ECE-and-MCE"]], "Pros of HL test": [[6, "Pros-of-HL-test"]], "Pros of ICI": [[7, "Pros-of-ICI"]], "Pros of Spiegelhalter\u2019s Z test": [[12, "Pros-of-Spiegelhalter's-Z-test"]], "Quick Start": [[10, null]], "Reference": [[5, "Reference"], [6, "Reference"], [7, "Reference"], [12, "Reference"]], "References": [[4, "References"], [9, "References"], [11, "References"]], "Reliability diagram": [[11, null]], "Running GUI": [[3, null]], "Size of COX slope and intecept test": [[4, "Size-of-COX-slope-and-intecept-test"]], "Size of HL test": [[6, "Size-of-HL-test"]], "Spiegelhalter\u2019s Z-test": [[12, null]], "Subgroup analysis": [[13, null]], "Submodules": [[0, "submodules"]], "Summary and guide for calzone": [[8, null]], "Testing the size of Spiegelhalter\u2019s z test": [[12, "Testing-the-size-of-Spiegelhalter's-z-test"]], "Theoretical Background": [[4, "Theoretical-Background"], [5, "Theoretical-Background"], [6, "Theoretical-Background"], [7, "Theoretical-Background"]], "Theoretical background": [[12, "Theoretical-background"]], "Using calzone in python": [[10, "Using-calzone-in-python"]], "Visualization of the fitted curve": [[7, "Visualization-of-the-fitted-curve"]], "Welcome to the documentation for calzone": [[1, null]], "calzone": [[2, null]], "calzone package": [[0, null]], "calzone.metrics module": [[0, "module-calzone.metrics"]], "calzone.utils module": [[0, "module-calzone.utils"]], "calzone.vis module": [[0, "module-calzone.vis"]]}, "docnames": ["calzone", "index", "modules", "notebooks/GUI", "notebooks/cox", "notebooks/ece_mce", "notebooks/hl_test", "notebooks/ici", "notebooks/metrics_summary", "notebooks/prevalence_adjustment", "notebooks/quickstart", "notebooks/reliability_diagram", "notebooks/spiegelhalter_z", "notebooks/subgroup", "notebooks/topclass"], "envversion": {"nbsphinx": 4, "sphinx": 63, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["calzone.rst", "index.rst", "modules.rst", "notebooks/GUI.ipynb", "notebooks/cox.ipynb", "notebooks/ece_mce.ipynb", "notebooks/hl_test.ipynb", "notebooks/ici.ipynb", "notebooks/metrics_summary.ipynb", "notebooks/prevalence_adjustment.ipynb", "notebooks/quickstart.ipynb", "notebooks/reliability_diagram.ipynb", "notebooks/spiegelhalter_z.ipynb", "notebooks/subgroup.ipynb", "notebooks/topclass.ipynb"], "indexentries": {"__init__() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.__init__", false]], "__init__() (calzone.utils.data_loader method)": [[0, "calzone.utils.data_loader.__init__", false], [0, "id0", false]], "__init__() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.__init__", false]], "abraitary_miscal() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.abraitary_miscal", false]], "alpha_val (calzone.utils.fake_binary_data_generator attribute)": [[0, "calzone.utils.fake_binary_data_generator.alpha_val", false]], "apply_prevalence_adjustment() (in module calzone.utils)": [[0, "calzone.utils.apply_prevalence_adjustment", false]], "beta_val (calzone.utils.fake_binary_data_generator attribute)": [[0, "calzone.utils.fake_binary_data_generator.beta_val", false]], "bootstrap() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.bootstrap", false]], "cal_ici() (in module calzone.metrics)": [[0, "calzone.metrics.cal_ICI", false]], "cal_ici_cox() (in module calzone.metrics)": [[0, "calzone.metrics.cal_ICI_cox", false]], "calculate_ece_mce() (in module calzone.metrics)": [[0, "calzone.metrics.calculate_ece_mce", false]], "calculate_metrics() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.calculate_metrics", false]], "calibrationmetrics (class in calzone.metrics)": [[0, "calzone.metrics.CalibrationMetrics", false]], "calzone": [[0, "module-calzone", false]], "calzone.metrics": [[0, "module-calzone.metrics", false]], "calzone.utils": [[0, "module-calzone.utils", false]], "calzone.vis": [[0, "module-calzone.vis", false]], "cox_regression_analysis() (in module calzone.metrics)": [[0, "calzone.metrics.cox_regression_analysis", false]], "data (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.data", false]], "data_loader (class in calzone.utils)": [[0, "calzone.utils.data_loader", false]], "data_path (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.data_path", false]], "fake_binary_data_generator (class in calzone.utils)": [[0, "calzone.utils.fake_binary_data_generator", false]], "find_optimal_prevalence() (in module calzone.utils)": [[0, "calzone.utils.find_optimal_prevalence", false]], "generate_data() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.generate_data", false]], "get_ci() (in module calzone.metrics)": [[0, "calzone.metrics.get_CI", false]], "have_subgroup (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.have_subgroup", false]], "header (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.Header", false]], "hosmer_lemeshow_test() (in module calzone.metrics)": [[0, "calzone.metrics.hosmer_lemeshow_test", false]], "labels (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.labels", false]], "linear_miscal() (calzone.utils.fake_binary_data_generator method)": [[0, "calzone.utils.fake_binary_data_generator.linear_miscal", false]], "logit_func() (in module calzone.metrics)": [[0, "calzone.metrics.logit_func", false]], "loss() (in module calzone.utils)": [[0, "calzone.utils.loss", false]], "lowess_regression_analysis() (in module calzone.metrics)": [[0, "calzone.metrics.lowess_regression_analysis", false]], "make_roc_curve() (in module calzone.utils)": [[0, "calzone.utils.make_roc_curve", false]], "module": [[0, "module-calzone", false], [0, "module-calzone.metrics", false], [0, "module-calzone.utils", false], [0, "module-calzone.vis", false]], "optimal_prevalence_adjustment() (calzone.metrics.calibrationmetrics method)": [[0, "calzone.metrics.CalibrationMetrics.optimal_prevalence_adjustment", false]], "plot_reliability_diagram() (in module calzone.vis)": [[0, "calzone.vis.plot_reliability_diagram", false]], "plot_roc_curve() (in module calzone.vis)": [[0, "calzone.vis.plot_roc_curve", false]], "probs (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.probs", false]], "reliability_diagram() (in module calzone.utils)": [[0, "calzone.utils.reliability_diagram", false]], "removing_nan() (in module calzone.utils)": [[0, "calzone.utils.removing_nan", false]], "softmax_to_logits() (in module calzone.utils)": [[0, "calzone.utils.softmax_to_logits", false]], "spiegelhalter_z_test() (in module calzone.metrics)": [[0, "calzone.metrics.spiegelhalter_z_test", false]], "subgroup_indices (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroup_indices", false]], "subgroups (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroups", false]], "subgroups_class (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroups_class", false]], "subgroups_index (calzone.utils.data_loader attribute)": [[0, "calzone.utils.data_loader.subgroups_index", false]], "transform_topclass() (calzone.utils.data_loader method)": [[0, "calzone.utils.data_loader.transform_topclass", false], [0, "id1", false]]}, "objects": {"": [[0, 0, 0, "-", "calzone"]], "calzone": [[0, 0, 0, "-", "metrics"], [0, 0, 0, "-", "utils"], [0, 0, 0, "-", "vis"]], "calzone.metrics": [[0, 1, 1, "", "CalibrationMetrics"], [0, 3, 1, "", "cal_ICI"], [0, 3, 1, "", "cal_ICI_cox"], [0, 3, 1, "", "calculate_ece_mce"], [0, 3, 1, "", "cox_regression_analysis"], [0, 3, 1, "", "get_CI"], [0, 3, 1, "", "hosmer_lemeshow_test"], [0, 3, 1, "", "logit_func"], [0, 3, 1, "", "lowess_regression_analysis"], [0, 3, 1, "", "spiegelhalter_z_test"]], "calzone.metrics.CalibrationMetrics": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "bootstrap"], [0, 2, 1, "", "calculate_metrics"], [0, 2, 1, "", "optimal_prevalence_adjustment"]], "calzone.utils": [[0, 3, 1, "", "apply_prevalence_adjustment"], [0, 1, 1, "", "data_loader"], [0, 1, 1, "", "fake_binary_data_generator"], [0, 3, 1, "", "find_optimal_prevalence"], [0, 3, 1, "", "loss"], [0, 3, 1, "", "make_roc_curve"], [0, 3, 1, "", "reliability_diagram"], [0, 3, 1, "", "removing_nan"], [0, 3, 1, "", "softmax_to_logits"]], "calzone.utils.data_loader": [[0, 4, 1, "", "Header"], [0, 2, 1, "id0", "__init__"], [0, 4, 1, "", "data"], [0, 4, 1, "", "data_path"], [0, 4, 1, "", "have_subgroup"], [0, 4, 1, "", "labels"], [0, 4, 1, "", "probs"], [0, 4, 1, "", "subgroup_indices"], [0, 4, 1, "", "subgroups"], [0, 4, 1, "", "subgroups_class"], [0, 4, 1, "", "subgroups_index"], [0, 2, 1, "id1", "transform_topclass"]], "calzone.utils.fake_binary_data_generator": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "abraitary_miscal"], [0, 4, 1, "", "alpha_val"], [0, 4, 1, "", "beta_val"], [0, 2, 1, "", "generate_data"], [0, 2, 1, "", "linear_miscal"]], "calzone.vis": [[0, 3, 1, "", "plot_reliability_diagram"], [0, 3, 1, "", "plot_roc_curve"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["py", "attribute", "Python attribute"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "py:attribute"}, "terms": {"": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11], "0": [0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "000": [4, 10, 13], "00037947714112375366": 13, "001": [0, 7, 10], "0012819521292332472": 14, "002": 10, "003": 10, "0038637011857438034": 14, "004": 10, "0046758796391944": 9, "005": 10, "00558856942568957": [7, 10, 13], "005610391483826338": [4, 7, 10, 13], "006": [10, 13], "007": 10, "007508966220374058": 9, "008": 10, "008733966945443138": [5, 10, 13], "008745511902314453": 9, "009": [10, 13], "009313116424641145": 14, "009458033653818828": [5, 10, 13, 14], "009458033653818974": 14, "009608653731328977": [5, 10, 13, 14], "009608653731329372": 14, "01": [10, 13], "010355911839501922": 9, "011": 10, "012": [10, 13], "01208775955804901": [5, 10, 13], "013671230516636386": 9, "014081013182402267": 9, "016": 10, "017": 10, "018": 10, "019": 10, "02": [4, 10, 13], "020": 10, "020515047600205394": 14, "020515047600205505": [5, 10, 13, 14], "021": [10, 13], "022": [9, 10], "023": [10, 13], "02324031223486256": [5, 10, 13], "025": [4, 10], "028": 10, "029": 4, "029403495083063648": 9, "029652031234071": 14, "03": 10, "031": 10, "032": 10, "03353272525376893": [4, 7, 10, 13], "034": [4, 10, 13], "035": 10, "036": 10, "037": [10, 13], "038": [10, 13], "039": [10, 13], "039000000000000035": 4, "03926468843081932": 14, "03926468843081976": [5, 10, 13, 14], "04": 9, "040": 4, "041": 10, "042": [10, 13], "043": 10, "044": 10, "04497652296600376": [4, 7, 10, 13], "045": [10, 13], "0450": 4, "04599": 5, "046": 10, "047": 6, "048": [10, 13], "04848338618970194": [5, 10, 13], "049": [10, 12, 13], "05": [0, 6, 12], "050": 10, "051": [4, 10, 13], "0512096313774626": [4, 7, 10, 13], "052": 10, "054": 10, "055": [6, 10, 13], "056": 10, "05600000000000005": 4, "058": 10, "062": 10, "063": [10, 13], "064": [10, 13], "065": [10, 13], "066": 10, "068": 10, "06894458205508802": 9, "069": 10, "06991428582761099": 13, "07": [0, 10, 13], "071": 10, "072": 10, "073": 10, "07356445428053172": 7, "074": [10, 13], "07429481165606829": 13, "07479369479609524": 13, "075": [10, 13], "076": 10, "0765892898243467": 13, "07658928982434714": 13, "07692218401743334": 7, "077": [10, 13], "07746407648179383": 13, "07796623141885761": 13, "078": [10, 13], "07872518570110512": 13, "079": [10, 13], "07961758926734244": 9, "08": 10, "081": 10, "083": 10, "0830845954424": 14, "084": 10, "0841517729106883": 9, "0841517733462589": 9, "085": 10, "086": 10, "087": 10, "0884212323912401": 14, "09": 10, "091": 10, "092": 10, "0962280216661371": 9, "0984122810555748": 7, "0x7fe048e8b470": 5, "1": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "10": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13], "1000": [0, 4, 6, 10, 12], "10000": [5, 6, 12], "1002": [4, 7, 12], "101": 9, "1016": 9, "1027": 10, "1043": 6, "1069": 6, "107": 10, "108": 10, "109": 10, "1093": [4, 7, 9], "11": 9, "11017646753618837": 14, "111": 10, "1135847825485915": 14, "1138": [10, 13], "114": 10, "115": 10, "1156": 10, "116": [6, 10], "117": 6, "1175": 11, "1177": 9, "11820": 9, "12": 9, "123": [4, 6, 9, 10, 12, 13], "12348577118577644": [4, 7, 10, 13], "124": 10, "125": 10, "126": 10, "127": 10, "12775157222121533": 9, "1292": 10, "13": [10, 11], "1322110153978738": 14, "1327565894838103": 13, "133": [10, 13], "135": [10, 12], "1355": 10, "139": 10, "1393": 10, "1394": 9, "14": 11, "140": [10, 13], "14045600565696226": 13, "1409": 9, "14090872416947742": 13, "141": [10, 13], "1446": 9, "145": 10, "1457": 9, "15": [4, 7, 9, 10, 11, 14], "150": 0, "151": 10, "1512": 12, "1519": 12, "152": 10, "155": 10, "158": 10, "16": 6, "1609": 5, "16250572519432438": 13, "163": [10, 13], "168": 10, "1706": 5, "171": [10, 13], "1714434139853": 13, "175": 10, "18": [10, 13], "182": 10, "1844": 10, "185": 10, "1857": 10, "1927": 4, "1928": 4, "1958": 4, "1979": 7, "1980": 6, "1986": 12, "1997": 6, "1e": [0, 7], "1st": 4, "2": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13], "20": [6, 12], "2007": 11, "2008": [9, 12], "2009": 12, "2010": 9, "2011": 9, "2015": 5, "2017": 5, "2018": [4, 9], "2019": [5, 7], "202": 10, "2020": [4, 5, 7, 9], "2022": 5, "2024": 4, "203": 10, "204": 10, "205": 10, "21": [7, 10], "210": 4, "212": 4, "214": 10, "22": 11, "2246": [10, 13], "232": 10, "24": 10, "244": 10, "249": 10, "26": 10, "262": 4, "27": [4, 7, 9, 10, 13], "273": 10, "28": 10, "29": [5, 10], "2_": 6, "2p_i": 12, "3": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "300": 5, "31": 10, "319": 10, "327": [10, 13], "33": 10, "3300531914893617": 9, "3341": 10, "335": 10, "34": [4, 10, 12], "3465": 4, "352": [10, 13], "35209071874348785": [6, 10, 13], "368": 7, "37": 10, "376": [10, 13], "37632691618773545": 14, "3763269161877356": [10, 12, 13], "38": 7, "385628": 4, "39": [4, 5, 6, 7, 10, 12, 14], "3900": 10, "391": 10, "392": 10, "396": 10, "4": [0, 4, 5, 6, 7, 9, 10, 12, 14], "40": 11, "4051": 7, "4065": 7, "42": 11, "429": [10, 13], "43": 11, "4385762101924": 13, "439": [10, 13], "443": 10, "4436": 4, "4438": 4, "454": 10, "456": 12, "463": 10, "469": 10, "470": 10, "478": [10, 13], "47838663128188996": 13, "481": 10, "494": 10, "495": 10, "497": 10, "49863799264980607": 9, "4998": 4, "4999": 4, "5": [0, 4, 5, 6, 7, 9, 10, 11, 12, 14], "50": 11, "5000": [4, 9, 10], "507": [10, 13], "5071793536874274": 13, "508": 10, "524": 10, "526": 10, "53": 4, "5359720760929648": 13, "536": [10, 13], "539": 10, "554": 10, "569": 10, "577": 10, "581": 10, "584": 10, "5958290924064796": 9, "6": [0, 4, 5, 6, 10, 14], "611": 10, "621": [4, 7], "626": 10, "631": 10, "633": [4, 7], "637": 10, "638": 10, "641": [10, 13], "643": 12, "651": 11, "661": 11, "664": 10, "666": 10, "668": [10, 13], "67": 10, "681": 10, "686": 10, "6897839569176842": 9, "694947603203135": [6, 10, 13], "695": [10, 13], "696": [10, 13], "698": 10, "7": [0, 4, 6, 10], "7066738713391099": [10, 12, 13], "7066738713391101": 14, "707": [10, 13], "716": 10, "727": 10, "729": 10, "732": [10, 13], "74": 7, "7486601568004448": 14, "778": 10, "779": 10, "781": 10, "7837388214288888": 9, "794": 10, "8": [0, 5, 6, 10, 13], "813": 10, "818": 10, "8281": 7, "829": 7, "836": 7, "842": [10, 13], "87": 9, "8754203499121678": 9, "8754203499121679": 9, "8835446575708198": [6, 10, 13], "884": [10, 13], "884991559088098": [6, 10, 13], "885": [10, 13], "886": 10, "888": 10, "9": [0, 6, 10], "9097229915817346": 14, "910": [10, 13], "93575342117766": 13, "936": [10, 13], "937": [4, 10, 13], "9372902801721911": [4, 7, 10, 13], "940": 10, "9400481147756811": 9, "942": 10, "95": [10, 11], "96": 0, "9602": 5, "971": 10, "975": 4, "977": 10, "9781118445112": 4, "981": 10, "990": 10, "9939": 4, "994": [10, 13], "9942": 4, "9942499557748269": [4, 7, 10, 13], "996": 10, "9990721119864874": 14, "A": [0, 4, 6, 7, 9, 11, 13], "And": 9, "But": 11, "By": 9, "FOR": 6, "For": [0, 4, 8, 9, 10, 12], "If": [0, 4, 8, 10], "In": [4, 5, 6, 7, 9, 10, 12, 13, 14], "It": [0, 1, 5, 6, 7, 10, 11, 12], "No": [4, 5, 9], "OF": 6, "On": 5, "One": 6, "Or": 10, "THE": 6, "That": 4, "The": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12], "Then": [3, 10], "There": [4, 5, 6, 10], "These": 0, "To": [1, 3, 4, 7, 8, 9, 10, 14], "_": [5, 6, 12, 14], "__init__": [0, 2], "aaai": 5, "ab": [5, 9], "about": [5, 6, 11, 14], "abov": [5, 9, 10], "abraitary_misc": [0, 2], "absenc": 0, "absolut": 0, "ac": 5, "academ": 9, "acc": 5, "access": 8, "accordingli": 0, "account": 5, "accur": [1, 8, 12], "accuraci": [0, 5, 7, 11], "achiev": 8, "acra": 9, "across": 0, "actual": [0, 7], "adapt": [5, 9], "add": [0, 9], "addint": 9, "addit": 0, "adjust": [0, 1, 4, 8, 10, 11], "adjusted_p": 9, "adjusted_preval": 0, "adjusted_prob": 0, "adjusted_proba": 0, "adopt": 5, "advantag": [4, 6, 7], "affect": [5, 6, 7, 9], "after": [8, 9, 12], "after_preval": 9, "again": [6, 8], "against": [0, 11], "aim": [5, 8], "aka": [4, 9], "al": [5, 6], "alern": 13, "all": [0, 3, 4, 5, 8, 10, 13, 14], "allow": 0, "alpha": [0, 6, 12], "alpha_v": [0, 2, 4, 6, 9, 10, 12], "alreadi": 10, "also": [4, 5, 6, 7, 9, 10, 12, 13], "altern": [4, 5, 7, 10], "alternativli": 14, "although": 4, "alwai": 5, "american": [4, 7], "an": [0, 4, 5, 6, 7, 9, 10, 13], "analysi": [0, 1], "ani": [0, 6, 7, 12], "append": [4, 6, 12], "appli": [0, 8, 9, 10], "applic": [0, 4, 9, 13], "apply_prevalence_adjust": [0, 2, 9], "approach": 14, "approxim": 12, "ar": [0, 3, 4, 5, 6, 10, 13], "arang": 5, "arbitrari": [0, 7], "area": 0, "argument": [0, 9, 10], "around": 4, "arrai": [0, 4, 6, 12], "arrieta": 5, "artifici": [5, 11], "arxiv": [5, 9], "assess": [0, 1, 4, 6, 8], "associ": [0, 4, 7], "assum": [0, 9, 10, 11], "assumpt": 6, "auc": 0, "austin": 7, "automat": 10, "avail": 0, "averag": [0, 5, 7, 8, 11], "avoid": 0, "b": [4, 9, 12, 13], "b_m": 5, "back": 0, "background": 1, "bad": 11, "bar": [0, 11], "base": [0, 5, 6], "basic": 10, "batch": 1, "bay": 9, "bayesian": 5, "becaus": 5, "becom": [9, 14], "been": [4, 6], "befor": [1, 9], "before_preval": 9, "being": 9, "below": [4, 5, 11], "bernoulli": 11, "beta": [0, 10], "beta_v": [0, 2, 4, 6, 9, 10, 12], "better": 0, "between": [0, 4, 5, 9], "beyond": [4, 6], "big": 4, "biggest": [5, 6], "bin": [0, 1, 4, 6, 7, 8, 10, 11, 12], "bin_count": [0, 5, 6, 7, 11, 14], "bin_counts_0": 11, "bin_counts_1": 11, "bin_edg": [0, 5, 6, 7, 11, 14], "binari": [0, 1, 4, 8, 9, 10, 11, 14], "binomi": 10, "biostatist": 9, "blue": 11, "bool": [0, 9, 11], "boostrap": 10, "bootstrap": [0, 1, 2, 10], "bootstrap_ci": 10, "both": [0, 4, 5, 6, 7, 9], "bound": 0, "breast": 9, "brier": 12, "brocker": 12, "br\u00f6cker": [11, 12], "c": [0, 5, 6, 7, 9, 10, 13, 14], "cal_ici": [0, 2], "cal_ici_cox": [0, 2, 7], "cal_metr": [4, 6, 10, 12, 13, 14], "calcualt": 8, "calcuat": 11, "calcul": [0, 1, 8, 9, 10, 11], "calculate_ece_mc": [0, 2, 5], "calculate_metr": [0, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14], "calib": 5, "calibbr": 11, "calibr": [0, 1, 6, 9, 10, 11, 12, 13, 14], "calibrationmetr": [0, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14], "call": [4, 5, 6, 7, 12], "callabl": 0, "calmetr": [4, 5, 6, 7, 9, 12], "calster": 4, "calzon": [3, 11, 13, 14], "can": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "cancer": 9, "cannot": 6, "cao": 1, "capabl": 1, "captur": [4, 7], "care": 14, "carlo": 0, "case": [4, 7, 9, 10, 12], "caus": 5, "caution": 8, "cd": 10, "cdot": [4, 9], "certain": 4, "cessi": 6, "chanc": 4, "chang": [4, 5], "characterist": 0, "chart": 0, "check": [0, 6, 12], "chen": 9, "chi": [0, 6], "chi_squar": 0, "choic": [7, 9, 11], "choos": 4, "ci": 10, "claim": 6, "class": [0, 1, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14], "class_1_index": [9, 11], "class_1_sampl": [9, 11], "class_to_calcul": [0, 4, 5, 6, 7, 9, 10, 12, 13, 14], "class_to_plot": [0, 5, 6, 7, 11, 14], "classif": [0, 1, 8, 9, 10, 11, 14], "classifi": 9, "clasto": 11, "clean": 0, "cleveland": 7, "cli": 10, "clinic": [4, 7, 12], "clip": [0, 7], "clone": 10, "close": [0, 4, 7], "closer": 4, "coef": [0, 4, 7, 9, 10, 13, 14], "coef_ci": 0, "coeffici": 0, "color": [0, 6, 7, 12], "column": [0, 10, 13], "com": 10, "comma": 10, "command": [1, 3, 13], "common": [4, 7, 14], "commun": 6, "compar": [0, 6, 12], "comparison": 6, "compon": 9, "comprehens": [1, 8], "comput": [0, 4, 5, 7], "con": [1, 8], "conda": 10, "conf": 5, "confer": 5, "confid": [0, 1, 4, 5, 10, 11], "confind": [0, 5, 6, 7, 11, 14], "confindence_0": 11, "confindence_1": 11, "conjunct": 0, "connect": 0, "consid": [8, 14], "const": 4, "constant": 1, "constraint": 4, "contain": [0, 8, 13], "content": 2, "continu": 9, "control": 4, "convent": 5, "convention": 6, "converg": 4, "convert": [0, 14], "cooper": 5, "correct": [0, 7, 9, 11, 12, 14], "correspond": [0, 8, 10], "could": [5, 7, 11], "count": [0, 5, 6], "covari": 4, "coverag": [1, 8], "cox": [0, 1, 8, 9, 10, 13, 14], "cox_func": 7, "cox_ici": 7, "cox_intercept": [4, 7], "cox_intercept_ci": [4, 7], "cox_intercept_lowerci": 4, "cox_intercept_upperci": 4, "cox_p_correct": 7, "cox_regression_analysi": [0, 2, 4, 7], "cox_slop": [4, 7], "cox_slope_ci": [4, 7], "cox_slope_lowerci": 4, "cox_slope_upperci": 4, "creat": [0, 11, 13], "credit": 1, "cross": [9, 12], "csv": [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14], "csv_file": [10, 13], "cumul": 7, "current": [3, 4], "curv": [0, 1], "custom": 0, "custom_color": [0, 11], "d": [4, 5, 6, 9, 12], "data": [0, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "data_load": [0, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "data_path": [0, 2, 4, 5, 6, 7, 11, 12], "dataset": [1, 8, 9, 10, 13, 14], "date": 4, "david": 12, "decil": 6, "decompos": 12, "decomposit": 12, "deep": 5, "default": [0, 6, 10], "defin": [5, 12], "degre": [0, 6], "delta": [0, 7], "demonstr": [5, 9, 11], "demostr": [4, 6, 7], "denomin": 9, "densiti": [0, 6, 12], "dep": 4, "depend": [0, 4, 5, 6, 8, 9, 10], "deriv": [9, 10], "describ": 7, "descript": 8, "detail": [0, 4, 8, 13], "detect": 10, "determin": [0, 6, 8], "develop": [3, 10], "deviat": [5, 7, 8], "df": [0, 4, 6], "df_": [5, 7], "diagnosi": 9, "diagnost": 9, "diagram": [0, 1, 5, 6, 7, 8, 10, 13, 14], "dict": 0, "dictionari": 0, "didsr": 10, "differ": [0, 4, 5, 6, 7, 9, 14], "directli": [5, 7, 9, 10, 12], "directori": [3, 10], "disadvantag": [5, 7], "discrimin": 12, "discuss": [9, 11], "diseas": 9, "displai": 0, "distanc": 0, "distribut": [0, 5, 6, 7, 9, 10, 12], "diverg": [9, 12], "divid": [0, 6], "do": [4, 5, 13], "document": [6, 8], "doe": [6, 12], "doesn": [4, 7, 9, 12], "dof": 10, "doi": [4, 5, 7, 9, 11, 12], "don": 10, "done": [5, 9], "dot": 0, "dpi": 0, "drop": [5, 9, 11], "drop_indic": [9, 11], "dtype": [9, 11], "due": 8, "dure": 13, "dusenberri": 5, "e": [1, 4, 5, 7, 9, 10, 12], "e_": 6, "each": [0, 6, 8, 10, 11, 13, 14], "easi": [5, 6], "easili": 7, "ec": [0, 1, 4, 7, 8, 9, 10, 13, 14], "ece_": 8, "ece_c": 5, "ece_c_classon": 5, "ece_c_top_class": 5, "ece_equal_width": 5, "ece_h": 5, "ece_h_classon": 5, "ece_h_top_class": 5, "ed": 4, "edg": 0, "effect": 5, "eg": 13, "els": [4, 6, 12], "empir": [0, 7], "encod": 0, "end": 10, "entropi": [9, 12], "enumer": 13, "epsilon": 0, "equal": [0, 4, 5, 6, 11, 14], "equat": 9, "equival": [5, 9], "err": 4, "error": [0, 1, 7, 8, 11, 12], "error_bar": [0, 11, 14], "essenti": [1, 7, 8], "estim": [0, 1, 4, 7, 8, 9], "et": [5, 6], "eta": 9, "etc": [1, 13], "evalu": 0, "evenli": 5, "event": [0, 6], "evid": 12, "exact": [4, 11], "examin": [4, 6, 8], "exampl": [0, 4, 7, 10, 11, 12, 13], "example_data": [4, 5, 6, 7, 10, 11, 12, 13, 14], "exceed": 4, "except": [8, 14], "exepect": 1, "exit": 10, "exp": 0, "expand": 9, "expect": [0, 5, 6, 7, 8], "experi": 5, "experienc": 10, "explan": 8, "explicitli": [5, 6], "extens": [1, 5], "extract": 0, "extrem": 12, "f": [0, 4, 7, 9, 12, 13], "f_": 7, "fact": [9, 12], "factor": [0, 9], "fake": [0, 6], "fake_binary_data_gener": [0, 2, 4, 6, 9, 12], "fakedata_gener": [4, 6, 9, 12], "fals": [0, 4, 5, 6, 7, 9, 10, 11, 12], "fan": 1, "featur": [1, 3], "field": [0, 13], "fig": 7, "figur": 0, "file": [0, 10, 13], "find": [0, 1, 9], "find_optimal_preval": [0, 2, 9], "first": [0, 1, 4, 5, 6, 9, 10, 12], "fit": [0, 1, 4, 6], "fix": [0, 4, 5], "fix_intercept": [0, 4], "fix_slop": [0, 4], "flag": [0, 10], "float": 0, "follow": [0, 3, 5, 6, 9, 10, 11, 12, 13], "foral": 9, "forecast": 11, "format": [0, 10, 13], "formula": 7, "fpr": 0, "frac": [4, 5, 6, 7, 9, 12], "fraction": 0, "freedom": [0, 6], "frequenc": [0, 6], "frequent": 0, "from": [0, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "full": [0, 10], "func": 0, "function": [0, 1, 4, 6, 7, 9, 10, 11, 12, 14], "further": [4, 9], "g": [1, 5], "gabriel": [4, 7], "gener": [0, 4, 6, 8, 9, 10, 12], "generate_data": [0, 2, 4, 6, 9, 12], "generate_miscal_data": 10, "generate_subgroup_data": 10, "generate_wellcal_data": 10, "get_ci": [0, 2], "giger": 9, "git": 10, "github": 10, "give": [4, 5, 7], "given": [0, 8, 9, 11, 13], "glaser": 9, "go": 11, "goe": 5, "good": [0, 5, 6, 7, 11], "granular": 0, "graphic": 0, "greater": 6, "greatli": 7, "grid": 7, "group": [0, 5, 6, 7, 11, 13], "gt": [4, 5], "gu": 9, "gui": [1, 10], "gui_cal_metr": 3, "guid": [1, 4], "gujral": 5, "guo": 5, "h": [0, 5, 6, 9, 10, 13, 14], "ha": [4, 10, 12, 13], "had": 6, "half": 9, "hand": 7, "handl": [0, 14], "hat": [4, 5, 7, 8, 9], "hauskrecht": 5, "have": [0, 1, 4, 6, 7, 8, 9, 10, 11, 14], "have_subgroup": [0, 2, 13], "header": [0, 2, 10], "heavili": 5, "help": 10, "helper": 10, "here": [6, 9], "high": 4, "higher": 0, "hist": [6, 12], "histtyp": [6, 12], "hl": [0, 1, 4, 8, 10, 12, 13, 14], "hl_c_p": 6, "hl_c_pvalu": 6, "hl_c_t": 6, "hl_h_p": 6, "hl_h_pvalu": 6, "hl_h_t": 6, "hl_test_valid": 10, "hope": 1, "horsch": 9, "hosmer": [0, 1, 8, 12], "hosmer_lemeshow_test": [0, 2, 6], "hot": 0, "how": [4, 5, 6, 8, 9], "howev": [5, 6, 7, 9, 12], "hsu": 9, "http": [4, 5, 7, 9, 10, 11, 12], "huang": [4, 7], "hyperparamet": [7, 12], "hypothesi": [6, 12], "i": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "ibarra": 5, "ici": [0, 1, 4, 8, 9, 10, 13, 14], "idea": 7, "ideal": 6, "ident": 0, "ig": 7, "ignor": 5, "illur": 10, "imag": 10, "imbalanc": 9, "implement": [6, 7], "implicitli": 9, "import": [4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "importlib": 6, "improv": 1, "inch": 0, "includ": [0, 1, 4, 8, 10], "incorrect": 6, "increas": [5, 11], "independ": [0, 9], "index": [0, 1, 8], "indic": [0, 4, 6], "industri": 6, "inform": [0, 11], "informat": [4, 7], "initi": 0, "input": [0, 10, 13], "insid": 10, "inspir": 12, "instabl": 0, "instal": [1, 3], "instead": [0, 6, 10], "int": [0, 9, 11], "int_0": [5, 7], "intecept": 1, "integr": [0, 1, 8], "intellig": [5, 9], "intend": [1, 8], "interact": 4, "intercept": [0, 1, 7, 8, 9, 10, 13, 14], "intercept_ci": 0, "interest": [0, 1, 4, 5, 8, 10, 13, 14], "interestingli": 7, "interfac": [1, 13], "intern": 0, "interpol": 0, "interpret": [5, 6, 7, 14], "interv": [0, 1, 4, 10, 11], "intrins": 9, "intuit": [5, 6], "invers": 0, "is_equal_freq": [0, 5, 6], "iter": 4, "itself": 7, "j": [5, 9, 11, 12, 13], "jamia": [4, 7], "jason": 1, "jerfel": 5, "journal": [4, 7, 12], "just": [5, 6, 13], "k": [5, 7, 9], "kei": [0, 1, 4, 6, 9, 12], "kenett": 4, "keyword": 0, "kira": 9, "kl": [9, 12], "kwarg": 0, "kwok": 1, "kxq045": 9, "l": [4, 5, 6, 7, 9, 11], "label": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "last": 10, "lead": [4, 9, 11], "learn": [1, 5, 8], "left": [6, 9], "legend": [5, 6, 7, 12], "lemesbow": 6, "lemeshow": [0, 1, 8, 12], "len": [5, 9, 11], "level": 0, "li": [4, 7], "librari": 11, "like": [0, 4, 7, 12], "likelihood": [4, 9, 11], "limit": 12, "line": [0, 1, 3, 7, 13], "linear": [0, 4, 7], "linear_misc": [0, 2], "linestyl": [6, 12], "list": [0, 4, 6, 10, 12], "literatur": [5, 6], "liu": 9, "ll": 4, "llr": 4, "load": [0, 4, 5, 6, 7, 11], "loader": 10, "loc": [6, 12], "local": [0, 7, 10], "loess": [0, 1, 8, 9, 10, 13, 14], "loess_ici": 7, "loess_ici2": 7, "log": [0, 4, 9], "logical_and": 4, "logist": [0, 4, 6, 7], "logit": [0, 1, 4], "logit_func": [0, 2, 7], "longford": 4, "look": [7, 11], "loop": 13, "loss": [0, 2, 9], "low": [4, 6], "lower": 0, "lowerci": [4, 7, 9, 10, 13, 14], "lowess": 0, "lowess_fit_p": 7, "lowess_fit_p2": 7, "lowess_fit_p2_correct": 7, "lowess_fit_p_correct": 7, "lowess_regression_analysi": [0, 2, 7, 9], "lr": 9, "lt": 5, "lung": 1, "m": [5, 6, 9, 10, 13], "machado": [4, 7], "macheret": [4, 7], "machin": [1, 5, 8], "mai": 5, "main": 7, "make_roc_curv": [0, 2], "manag": 12, "mani": [6, 7, 12, 13], "mannual": 9, "margin": 9, "marker": [7, 9], "mask": [9, 11], "match": 9, "mathbb": [5, 7, 8], "matplolib": 10, "matplotlib": [0, 5, 6, 7, 10, 12], "max": [5, 6, 12], "max_": 5, "max_m": 5, "max_p": 5, "maximum": [0, 1, 4], "mce": [0, 1, 4, 10, 13, 14], "mce_": 8, "mce_c_classon": 5, "mce_c_top_class": 5, "mce_h_classon": 5, "mce_h_top_class": 5, "mean": [0, 4, 5, 6, 7, 8, 9, 11, 12], "meaning": [1, 8], "measur": [0, 4, 5, 7, 12], "medic": [4, 7, 9], "medicin": 7, "messag": 10, "meteorolog": 12, "method": [0, 4, 5, 6, 7, 9, 11], "metric": [1, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14], "metrics_c": 13, "metz": 9, "mi": [9, 11], "might": [9, 11], "min_": 9, "minim": [0, 9], "miscal_dataload": 7, "miscal_funct": 0, "miscal_scal": [0, 10], "miscalibr": [0, 4, 7, 10, 12], "mislead": 11, "mle": 4, "model": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12], "moder": [4, 8], "modern": 5, "modifi": 5, "modul": 2, "monoton": 9, "mont": 0, "more": [0, 4, 8, 10, 11, 13], "most": [5, 7, 8, 14], "much": 5, "multi": [0, 1, 8, 10, 14], "multiclass": [0, 1], "multipl": [0, 6], "must": [0, 10], "n": [4, 5, 7, 9, 10, 12, 13], "n_bootstrap": 10, "n_class": 0, "n_m": 6, "n_point": 0, "n_sampl": 0, "naeini": 5, "name": [0, 12], "nan": [0, 4], "nbin": [0, 10], "ndarrai": 0, "necessari": 11, "need": [0, 3, 4, 6, 7, 8, 10], "network": 5, "neural": 5, "new": [0, 4, 5, 9], "nicegui": [3, 10], "nixon": 5, "non": [6, 7, 12], "none": [0, 5, 11], "nonrobust": 4, "normal": 12, "note": 0, "notebook": [4, 8, 9, 10, 14], "notic": [5, 6, 7, 9, 10], "now": 14, "np": [4, 5, 6, 7, 9, 11, 12, 13], "null": [4, 6, 12], "num_bin": [0, 5, 6, 7, 10, 11, 13, 14], "number": [0, 4, 5, 6, 10], "numer": 0, "numpi": [0, 4, 5, 6, 7, 9, 10, 11, 12, 13], "o_": 6, "object": 0, "observ": [0, 4, 5, 6, 8], "obtain": 5, "oct": 4, "ocz228": [4, 7], "odd": 4, "off": 7, "offer": [1, 6], "often": 6, "ohno": [4, 7], "one": [0, 10, 13], "ones": [9, 11], "onli": [0, 1, 4, 5, 8, 10, 11, 13, 14], "onlin": 4, "oper": 0, "opposit": [5, 6], "optim": [0, 9], "optimal_preval": [0, 9], "optimal_prevalence_adjust": [0, 2], "option": [0, 4, 6, 10], "order": [0, 6, 10, 13], "org": [4, 5, 7, 9, 11, 12], "origin": [4, 10], "other": [0, 4, 5, 6, 7, 8, 9, 11, 12, 13], "otherwis": 0, "outcom": [0, 4, 9], "output": [0, 9, 10], "over": [4, 5, 8], "overal": [4, 10, 13], "overconfid": 11, "p": [0, 1, 4, 5, 7, 8, 9, 10, 13, 14], "p_": [4, 5, 14], "p_i": [7, 9, 12], "p_valu": [0, 12], "packag": [1, 2, 4, 10, 11, 12, 13], "pakdaman": 5, "paramet": 0, "parametr": [6, 7, 12], "part": 5, "particular": 5, "particularli": 1, "pass": 0, "path": [0, 10], "patient": 12, "peopl": 7, "pepe": 9, "per": 0, "percentil": 0, "perfect": 4, "perfectli": 7, "perform": [0, 4, 9, 10, 13], "perform_pervalance_adjust": [0, 9], "perhap": 5, "petrick": 9, "pezeshk": 9, "piegorsch": 4, "pip": [3, 10], "pleas": [1, 8], "pleiss": 5, "plot": [0, 5, 7, 8, 10, 11, 13, 14], "plot_bin": 10, "plot_reliability_diagram": [0, 2, 7, 11, 14], "plot_roc_curv": [0, 2], "plote": 7, "plt": [5, 6, 7, 12], "png": 10, "point": 0, "polynomi": 7, "poor": 0, "poorli": 0, "popul": [1, 8, 9, 13], "posit": 0, "posterior": [9, 11], "power": [6, 12], "pp": 4, "practic": 9, "predict": [0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 14], "predictor": 4, "preform": [1, 13], "prepar": 10, "preprocess": 0, "presenc": 0, "present": 0, "preval": [0, 1, 4, 8, 10, 11, 12], "prevalecn": 10, "prevalence_adjust": 10, "previou": 14, "print": [0, 4, 5, 6, 7, 9, 10, 12, 13], "print_result": [0, 4, 7], "pro": [1, 8], "prob": [0, 2, 4, 5, 6, 7, 10, 11, 12, 13, 14], "proba": 13, "proba_0": [0, 10, 13], "proba_1": [0, 10, 13], "proba_adjust": 9, "proba_class1": 7, "proba_n": [10, 13], "probabilist": [0, 5, 6, 12], "probabiliticst": 0, "probabl": [0, 4, 5, 6, 7, 8, 9, 10, 11, 14], "problem": [0, 5, 6, 8, 9, 10, 11, 14], "proce": 11, "proceed": 5, "process": [1, 10], "produc": 9, "prognost": 4, "program": 10, "project": 1, "proper": 12, "proport": 0, "propos": [5, 12], "prove": [6, 9], "provid": [0, 1, 6, 8, 9, 10, 11, 12, 13], "pseudo": 4, "purpos": 10, "py": [3, 10, 13], "pyplot": [5, 6, 7, 12], "python": [1, 3], "q": 5, "qian": 1, "qj": 12, "quantifi": 7, "quarterli": 12, "quick": 1, "quickli": [5, 6], "quickstart": 13, "r": [4, 6, 7, 12], "radiologi": 9, "random": [4, 6, 9, 11, 12], "random_se": 10, "rang": [4, 5, 6, 12], "range_of_bin": 5, "rate": 0, "ratio": [9, 11], "raw": 0, "re": 9, "read": [1, 13], "reader": 5, "real": 13, "rearrang": 9, "reason": [5, 11], "recalibr": 4, "receiv": 0, "recommend": [0, 5, 6, 8, 12], "red": 11, "refer": [1, 8], "regress": [0, 4, 6, 7], "regular": 14, "reject": 6, "relat": [7, 11], "relationship": 9, "reli": [5, 9], "reliability_0": 11, "reliability_1": 11, "reliability_diagram": [0, 2, 4, 5, 6, 7, 11, 12, 14], "reliabl": [0, 1, 5, 6, 7, 8, 10, 12, 13, 14], "relibail": 10, "reload": 6, "remind": 5, "remov": 0, "removing_nan": [0, 2], "replac": [0, 9, 11], "repositori": 10, "reprens": [1, 8], "repres": [0, 1, 8], "represent": 5, "requir": [6, 7, 10, 12], "resampl": 0, "research": 9, "residu": [0, 4], "resolut": [0, 9, 12], "respect": 9, "rest": [1, 8, 14], "result": [0, 4, 5, 6, 7, 8, 12, 13], "return": [0, 5, 9], "return_fig": 0, "return_numpi": [0, 4, 5, 6, 12], "reveal": 11, "reweight": 0, "right": [6, 9, 12], "risk": 4, "robust": 7, "roc": 0, "roc_auc": 0, "roc_curv": 0, "rough": 5, "round": [6, 12], "row": 0, "royal": 12, "ruggeri": 4, "rule": 12, "run": [0, 1, 10, 13], "sahin": 9, "sai": 9, "same": [0, 5, 7, 9, 10, 13, 14], "sampl": [0, 5, 6, 7, 10, 11, 14], "sample_s": [0, 4, 6, 12], "samuelson": 9, "save": [0, 10], "save_diagram_output": 10, "save_metr": 10, "save_path": 0, "save_plot": 10, "scale": [0, 9], "scatter": 7, "scatterplot": [0, 7], "scheme": [5, 6], "scipi": [0, 10, 11], "scope": [4, 6], "score": [0, 1, 4, 6, 9, 10, 11, 13, 14], "script": [1, 10], "search": 9, "second": [0, 5, 12], "section": [5, 9, 11], "see": [4, 5, 6, 8, 10, 11, 12, 13, 14], "seed": [4, 6, 9, 12], "select": 0, "self": 0, "sens": [8, 11], "sensit": 0, "separ": 10, "seri": [11, 14], "set": [5, 9, 10, 11], "sever": 6, "shape": 0, "shift": [1, 4, 8, 11, 12], "should": [0, 6, 7, 8, 10, 13], "show": [0, 5, 6, 10, 11, 12], "shown": [4, 5, 6, 12], "signific": [0, 6], "sim": [6, 7], "similar": [5, 6], "similarili": 5, "similarli": [4, 5], "simpl": [5, 6, 13], "simpler": 5, "simpli": [3, 5, 10], "simplic": 5, "simul": [0, 6, 13], "simulated_data_subgroup": [10, 13], "simulated_data_subgroup_result": 10, "simulated_misdata": [7, 10], "simulated_misdata_result": 10, "simulated_welldata": [4, 5, 6, 7, 10, 11, 12, 14], "simulated_welldata_diagram_output": 10, "simulated_welldata_result": 10, "simulation_s": [4, 6, 12], "sinc": [4, 6, 9, 11], "situat": 12, "size": [0, 1, 9, 11], "size_c": 6, "size_h": 6, "skip": 7, "slope": [0, 1, 8], "small": 0, "smith": 11, "smooth": [0, 7], "smoothed_proba": 0, "so": 9, "societi": 12, "softmax": [0, 11], "softmax_to_logit": [0, 2, 11], "sole": 8, "some": [4, 5, 6, 12, 13], "sort": [0, 7], "sorted_proba": 0, "sourc": 0, "space": [0, 1, 8], "span": [0, 7], "special": 11, "specif": [8, 9, 10], "specifi": [0, 6, 8, 10, 14], "speigelhalt": 12, "spieegelhalt": 1, "spiegelhalt": [0, 1, 8], "spiegelhalter_z_test": [0, 2, 12], "spiegelhalterz": [0, 10, 12, 13, 14], "spline": 7, "sqrt": 12, "squ": 4, "squar": [0, 6, 12], "standard": [6, 12], "start": 1, "stat08078": 4, "statist": [0, 1, 4, 7, 9, 12], "statistician": 12, "statsmodel": 10, "statsref": 4, "std": 4, "step": [0, 6, 12], "steyerberg": [4, 7], "still": [4, 6, 7, 11, 14], "str": [0, 6, 12], "strongli": 8, "structur": 0, "studi": 6, "subgroup": [0, 1, 2, 10], "subgroup_1": [0, 10, 13], "subgroup_1_group_a": [10, 13], "subgroup_1_group_b": [10, 13], "subgroup_2": [0, 10, 13], "subgroup_class": 13, "subgroup_column": 13, "subgroup_indic": [0, 2, 13], "subgroup_m": [10, 13], "subgroups_class": [0, 2, 13], "subgroups_index": [0, 2, 13], "subject": 4, "submodul": 2, "success": 11, "suffici": [1, 8, 12], "suggest": [0, 6, 8], "suit": 1, "sum": 5, "sum_": [5, 6, 7, 9, 12], "summari": [0, 1], "sun": 5, "support": [0, 3, 6], "t": [0, 4, 5, 6, 7, 9, 10, 12], "take": [0, 8], "tannen": 5, "target": 4, "task": 1, "techniqu": 4, "tell": 4, "tempresult": [4, 6, 12], "term": [5, 7, 9, 12], "test": [0, 1, 8, 10, 11], "test_dataload": 11, "text": [5, 6, 7, 9, 12, 14], "th": [5, 6, 7], "than": [4, 6], "thei": [5, 7], "them": [0, 8, 10], "theorem": 9, "theoret": 1, "theori": 6, "therefor": [5, 6, 7, 8, 9], "thi": [0, 1, 4, 5, 6, 9, 10, 13], "thing": 13, "those": 0, "through": [7, 13], "tian": 9, "time": [4, 9], "titl": [0, 5, 6, 7, 11, 12, 14], "tool": [0, 1, 6, 10, 11], "top": [0, 1, 5, 8, 10, 11, 14], "topclass": [5, 9, 10, 13, 14], "total": [5, 6], "tpr": 0, "tradition": 5, "train": 6, "tran": 5, "transform": [0, 8, 9, 10, 14], "transform_topclass": [0, 2, 14], "transformed_data": 14, "treat": [0, 10], "trend": 4, "trial": [11, 12], "true": [0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14], "truth": 7, "try": [7, 10], "tune": 12, "tupl": 0, "turn": 10, "tutori": [4, 7, 10], "two": [4, 5, 6, 9, 13], "tygert": 5, "type": [0, 4, 5, 7], "typic": [0, 6], "u": [4, 5, 11], "ubgroup_2": 10, "uitliti": 0, "under": [0, 3, 4, 8, 12], "understand": 6, "uniqu": 0, "up": 5, "upper": [0, 6, 12], "upperci": [4, 7, 9, 10, 13, 14], "us": [0, 1, 3, 4, 5, 6, 8, 9, 11, 13, 14], "usag": 10, "useag": 10, "user": [4, 14], "usual": [4, 6, 9], "util": [2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], "v": [1, 4, 5, 8, 10, 14], "v29i1": 5, "valid": [6, 10], "valu": [0, 1, 4, 10, 11, 13, 14], "var": 12, "variabl": 4, "varianc": 12, "variou": [0, 1], "verbos": [10, 13], "veri": [6, 11, 12], "versa": 4, "version": [0, 5], "vi": [2, 7, 11, 14], "vice": 4, "visual": [0, 1, 8, 10, 11], "vline": [6, 12], "vstack": 11, "w": [4, 5, 6, 7, 9], "wa": 12, "waf993": 11, "wai": [4, 5, 6, 7, 13, 14], "want": [5, 10], "warn": 4, "we": [1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "weak": 4, "weather": 11, "wed": 4, "weight": [0, 5, 7], "weinberg": 5, "well": [0, 4, 5, 6, 7, 10, 11, 12, 14], "wellcal_dataload": [4, 5, 6, 7, 11, 12], "when": [0, 5, 6, 8, 9, 10, 14], "where": [0, 5, 6, 7, 9, 10, 11, 12, 13], "whether": [0, 4, 6, 8, 9, 10, 12, 13, 14], "which": [0, 4, 5, 7, 8, 9, 10, 11, 12], "while": [4, 5, 6, 7], "who": 12, "whole": 10, "wide": [0, 5, 6], "width": [5, 6, 7], "wilei": 4, "wilson": [0, 11], "window": 7, "within": [0, 9, 13], "without": 0, "word": 9, "work": [1, 6, 8, 10, 14], "workstat": 9, "world": 13, "wrong": 6, "x": [0, 4, 6, 7, 9, 12], "x1": 4, "x_1": 9, "x_2": 9, "x_i": 12, "xlabel": [5, 6, 7, 12], "xu": 5, "y": [0, 4, 5, 6, 7, 8, 9, 12], "y_": 5, "y_i": 9, "y_predict": 0, "y_proba": 0, "y_true": 0, "ylabel": [6, 7, 12], "ylim": 5, "ymax": [6, 12], "ymin": [6, 12], "you": [1, 3, 4, 7, 8, 10, 13, 14], "your": [1, 10], "z": [0, 1, 4, 8, 9], "z_score": [0, 12], "zero": 5, "zhang": 5}, "titles": ["calzone package", "Welcome to the documentation for calzone", "calzone", "Running GUI", "COX calibration analysis", "Exepected Calibration Error(ECE) and Maximum Calibration Error (MCE)", "Hosmer-Lemeshow test (HL test)", "Integrated Calibration Index (ICI)", "Summary and guide for calzone", "Prevalence adjustment", "Quick Start", "Reliability diagram", "Spiegelhalter\u2019s Z-test", "Subgroup analysis", "Multiclass extension"], "titleterms": {"": 12, "adjust": 9, "analysi": [4, 13], "background": [4, 5, 6, 7, 12], "bin": 5, "calcul": [4, 5, 6, 7, 12], "calibr": [4, 5, 7, 8], "calzon": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 12], "class": 9, "command": 10, "con": [4, 5, 6, 7, 12], "constant": 9, "content": [0, 1], "cox": [4, 7], "curv": 7, "diagram": 11, "document": 1, "ec": 5, "error": 5, "estim": 5, "exepect": 5, "extens": 14, "fit": 7, "function": 5, "gui": 3, "guid": 8, "hl": 6, "hosmer": 6, "ici": 7, "index": 7, "instal": 10, "intecept": 4, "integr": 7, "intercept": 4, "interest": 9, "interfac": 10, "lemeshow": 6, "line": 10, "loess": 7, "logit": 9, "maximum": 5, "mce": 5, "metric": [0, 8], "modul": 0, "multiclass": 14, "p": [6, 12], "packag": 0, "preform": 9, "preval": 9, "pro": [4, 5, 6, 7, 12], "python": 10, "quick": 10, "refer": [4, 5, 6, 7, 9, 11, 12], "reliabl": 11, "run": 3, "score": 12, "shift": 9, "size": [4, 5, 6, 12], "slope": 4, "spieegelhalt": 12, "spiegelhalt": 12, "start": 10, "statist": 6, "subgroup": 13, "submodul": 0, "summari": 8, "test": [4, 6, 12], "theoret": [4, 5, 6, 7, 12], "us": [7, 10, 12], "util": 0, "valu": [6, 12], "vi": 0, "visual": 7, "welcom": 1, "z": 12}})
\ No newline at end of file
diff --git a/docs/build/latex/calzone.aux b/docs/build/latex/calzone.aux
index b9bb92e..28f0d4a 100644
--- a/docs/build/latex/calzone.aux
+++ b/docs/build/latex/calzone.aux
@@ -78,8 +78,8 @@
 \newlabel{notebooks/hl_test:Calculating-HL-test-statistics-and-p-value-with-calzone}{{5.4}{30}{Calculating HL test statistics and p\sphinxhyphen {}value with calzone}{section.5.4}{}}
 \@writefile{toc}{\contentsline {section}{\numberline {5.5}Size of HL test}{31}{section.5.5}\protected@file@percent }
 \newlabel{notebooks/hl_test:Size-of-HL-test}{{5.5}{31}{Size of HL test}{section.5.5}{}}
-\@writefile{toc}{\contentsline {section}{\numberline {5.6}Reference}{33}{section.5.6}\protected@file@percent }
-\newlabel{notebooks/hl_test:Reference}{{5.6}{33}{Reference}{section.5.6}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {5.6}Reference}{34}{section.5.6}\protected@file@percent }
+\newlabel{notebooks/hl_test:Reference}{{5.6}{34}{Reference}{section.5.6}{}}
 \@writefile{toc}{\contentsline {chapter}{\numberline {6}COX calibration analysis}{35}{chapter.6}\protected@file@percent }
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
diff --git a/docs/build/latex/calzone.fdb_latexmk b/docs/build/latex/calzone.fdb_latexmk
index b0d2628..61d23aa 100644
--- a/docs/build/latex/calzone.fdb_latexmk
+++ b/docs/build/latex/calzone.fdb_latexmk
@@ -1,10 +1,10 @@
 # Fdb version 3
-["makeindex calzone.idx"] 1729800059 "calzone.idx" "calzone.ind" "calzone" 1729800063
-  "calzone.idx" 1729800993 7556 580e7a1735725e6c27bdde56ecb685cf "pdflatex"
+["makeindex calzone.idx"] 1729867374 "calzone.idx" "calzone.ind" "calzone" 1729867378
+  "calzone.idx" 1729868310 7556 580e7a1735725e6c27bdde56ecb685cf "pdflatex"
   (generated)
   "calzone.ilg"
   "calzone.ind"
-["pdflatex"] 1729800061 "calzone.tex" "calzone.pdf" "calzone" 1729800063
+["pdflatex"] 1729867376 "calzone.tex" "calzone.pdf" "calzone" 1729867378
   "/etc/texmf/web2c/texmf.cnf" 1728063259 475 c0e671620eb5563b2130f56340a5fde8 ""
   "/home/kwoklung.fan/.texlive2019/texmf-var/web2c/pdftex/pdflatex.fmt" 1728065444 8258469 f9aaee64b5629d9cdd5c82d0cd0a36eb ""
   "/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc" 1165713224 4850 80dc9bab7f31fb78a000ccfed0e27cab ""
@@ -174,30 +174,30 @@
   "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty" 1480098840 2211 af9b7d12507105a58a3e8e926996b827 ""
   "/usr/share/texmf/tex/latex/tex-gyre/ts1qtm.fd" 1480098840 1160 de7b1cf70edab73c9f1704df2a9fdbbd ""
   "/usr/share/texmf/web2c/texmf.cnf" 1581979058 38841 ce3692aa899bb693b90b87eaa5d4d84e ""
-  "calzone.aux" 1729800993 22211 dc2a842f7511e3ec24a7845d9d01e916 "pdflatex"
-  "calzone.ind" 1729800990 5374 7c24d43a4a21b13b2d7b0902a9618e79 "makeindex calzone.idx"
-  "calzone.out" 1729800993 11017 0b3b85c9fad9148defdef8b454953f20 "pdflatex"
-  "calzone.tex" 1729800988 261294 9696b4985a36ade6381572427a5900f8 ""
-  "calzone.toc" 1729800993 5237 8190a61a28239a1bb5b1869d1429b3bd "pdflatex"
+  "calzone.aux" 1729868310 22211 a8df3ff9bcaeab7e5f0b34c91e7fe470 "pdflatex"
+  "calzone.ind" 1729868306 5374 7c24d43a4a21b13b2d7b0902a9618e79 "makeindex calzone.idx"
+  "calzone.out" 1729868310 11017 0b3b85c9fad9148defdef8b454953f20 "pdflatex"
+  "calzone.tex" 1729868304 266768 dd59b96c0105007a038c42b1201f1929 ""
+  "calzone.toc" 1729868310 5237 3e608b94f160c2ce9829659ab9a4fa39 "pdflatex"
   "mytable.png" 1727877915 716184 0235eb86d9c3d9d8be3cd26a9ca00db9 ""
   "nbsphinx.sty" 1723612157 8202 a429e7504022e861d9f81e9a64e9928d ""
-  "notebooks_ece_mce_12_1.png" 1729800984 55390 1a16cfda687f702d53b1653f869e18ca ""
-  "notebooks_hl_test_13_1.png" 1729800985 21799 b95978db8e70a26596193ce74b909cd8 ""
-  "notebooks_hl_test_14_1.png" 1729800985 21843 d147f5f8c65154f77466ca9f2e92235a ""
-  "notebooks_ici_8_1.png" 1729800985 43328 2354d7edb62b8a1f369fcba69d425870 ""
-  "notebooks_quickstart_16_1.png" 1729800986 39928 4117abd19af3b57bc8e66a51a57c56f8 ""
-  "notebooks_quickstart_18_1.png" 1729800986 42921 ee3df6891a29f399f164d5e55f05e6ec ""
-  "notebooks_quickstart_20_1.png" 1729800986 42127 7de1ea85f325862a865034006d735614 ""
-  "notebooks_quickstart_20_3.png" 1729800986 42854 005a6f616efd03d978c3cb107a73a066 ""
-  "notebooks_quickstart_20_5.png" 1729800986 44619 432e67878b412c5e06e1dcc821ed903c ""
-  "notebooks_reliability_diagram_3_0.png" 1729800986 40555 7f21493f44d7ea51f3eb7d2e560d4f0b ""
-  "notebooks_reliability_diagram_5_0.png" 1729800986 47576 38437d3c4064586dc155be2ef6c8735b ""
-  "notebooks_reliability_diagram_8_0.png" 1729800986 40689 0cb4ca8d3c4b37c78e0a871a292c915f ""
-  "notebooks_reliability_diagram_9_0.png" 1729800986 60864 f866db6032833e6af5b14aa39a869dde ""
-  "notebooks_spiegelhalter_z_9_1.png" 1729800986 22199 c90489bd51b52e777b195f116cce338b ""
-  "notebooks_topclass_2_0.png" 1729800987 40555 7f21493f44d7ea51f3eb7d2e560d4f0b ""
+  "notebooks_ece_mce_12_1.png" 1729868301 55390 1a16cfda687f702d53b1653f869e18ca ""
+  "notebooks_hl_test_13_1.png" 1729868301 21799 b95978db8e70a26596193ce74b909cd8 ""
+  "notebooks_hl_test_14_1.png" 1729868301 21843 d147f5f8c65154f77466ca9f2e92235a ""
+  "notebooks_ici_8_1.png" 1729868302 43328 2354d7edb62b8a1f369fcba69d425870 ""
+  "notebooks_quickstart_16_1.png" 1729868302 39928 4117abd19af3b57bc8e66a51a57c56f8 ""
+  "notebooks_quickstart_18_1.png" 1729868302 42921 ee3df6891a29f399f164d5e55f05e6ec ""
+  "notebooks_quickstart_20_1.png" 1729868302 42127 7de1ea85f325862a865034006d735614 ""
+  "notebooks_quickstart_20_3.png" 1729868302 42854 005a6f616efd03d978c3cb107a73a066 ""
+  "notebooks_quickstart_20_5.png" 1729868302 44619 432e67878b412c5e06e1dcc821ed903c ""
+  "notebooks_reliability_diagram_3_0.png" 1729868302 40555 7f21493f44d7ea51f3eb7d2e560d4f0b ""
+  "notebooks_reliability_diagram_5_0.png" 1729868302 47576 38437d3c4064586dc155be2ef6c8735b ""
+  "notebooks_reliability_diagram_8_0.png" 1729868302 40689 0cb4ca8d3c4b37c78e0a871a292c915f ""
+  "notebooks_reliability_diagram_9_0.png" 1729868302 60864 f866db6032833e6af5b14aa39a869dde ""
+  "notebooks_spiegelhalter_z_9_1.png" 1729868303 22199 c90489bd51b52e777b195f116cce338b ""
+  "notebooks_topclass_2_0.png" 1729868303 40555 7f21493f44d7ea51f3eb7d2e560d4f0b ""
   "sphinx.sty" 1727458774 50659 6d393be3f369a7862f0b19a359f1ab89 ""
-  "sphinxhighlight.sty" 1729800987 7553 83fb52292c17957d9f4aadcb28c57a87 ""
+  "sphinxhighlight.sty" 1729868303 7553 83fb52292c17957d9f4aadcb28c57a87 ""
   "sphinxlatexadmonitions.sty" 1727458774 18222 f3bfd316b630ed188fcc20bf320acafe ""
   "sphinxlatexcontainers.sty" 1727458774 901 d3a3a1b7b2547f47ade2499350b5c420 ""
   "sphinxlatexgraphics.sty" 1727458774 4840 a9578332b6f3b35e198751fb632c9b79 ""
@@ -212,15 +212,15 @@
   "sphinxlatexstyletext.sty" 1727458774 6881 543f3cecccc7dccac396b5720cccf443 ""
   "sphinxlatextables.sty" 1727458774 57644 2253ce149b29042948a000d2dbf50b50 ""
   "sphinxmanual.cls" 1727458774 4241 7b0d7a37df7b5715fb0dbd585c52ecdb ""
-  "sphinxmessages.sty" 1729800988 745 3f5fcd6cdd7964ed608767954a8ced6f ""
+  "sphinxmessages.sty" 1729868304 745 3f5fcd6cdd7964ed608767954a8ced6f ""
   "sphinxoptionsgeometry.sty" 1727458774 2061 47bb34b8ed8a78823eb0c886abfb9f4d ""
   "sphinxoptionshyperref.sty" 1727458774 1094 79beb8b8a3f10784f8cce804e0f9d3aa ""
   "sphinxpackageboxes.sty" 1727458774 36106 1be2053eb1cb9b083b3a75e3657bcb24 ""
   "sphinxpackagefootnote.sty" 1727458774 15330 2fb656b6ce8cd1f6aba2d1c508fb51e5 ""
   (generated)
+  "calzone.pdf"
   "calzone.idx"
-  "calzone.log"
   "calzone.out"
-  "calzone.aux"
   "calzone.toc"
-  "calzone.pdf"
+  "calzone.aux"
+  "calzone.log"
diff --git a/docs/build/latex/calzone.log b/docs/build/latex/calzone.log
index 085c9f1..120a963 100644
--- a/docs/build/latex/calzone.log
+++ b/docs/build/latex/calzone.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2024.10.4)  24 OCT 2024 16:01
+This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2024.10.4)  25 OCT 2024 10:42
 entering extended mode
  restricted \write18 enabled.
  %&-line parsing enabled.
@@ -893,67 +893,67 @@ LaTeX Font Info:    Font shape `T1/txtt/b/n' in size <10> not available
 (Font)              Font shape `T1/txtt/bx/n' tried instead on input line 237.
  [4]
 LaTeX Font Info:    Trying to load font information for TS1+txtt on input line 
-329.
+334.
 
 (/usr/share/texlive/texmf-dist/tex/latex/txfonts/ts1txtt.fd
 File: ts1txtt.fd 2000/12/15 v3.1
 )
-Underfull \vbox (badness 2343) detected at line 383
+Underfull \vbox (badness 2343) detected at line 389
  []
 
 [5]
 <notebooks_quickstart_16_1.png, id=354, 507.3354pt x 383.031pt>
 File: notebooks_quickstart_16_1.png Graphic file (type png)
 <use notebooks_quickstart_16_1.png>
-Package pdftex.def Info: notebooks_quickstart_16_1.png  used on input line 403.
+Package pdftex.def Info: notebooks_quickstart_16_1.png  used on input line 409.
 
 (pdftex.def)             Requested size: 469.77333pt x 354.69556pt.
  [6 <./notebooks_quickstart_16_1.png>]
 <notebooks_quickstart_18_1.png, id=361, 507.3354pt x 383.031pt>
 File: notebooks_quickstart_18_1.png Graphic file (type png)
 <use notebooks_quickstart_18_1.png>
-Package pdftex.def Info: notebooks_quickstart_18_1.png  used on input line 489.
+Package pdftex.def Info: notebooks_quickstart_18_1.png  used on input line 495.
 
 (pdftex.def)             Requested size: 469.77333pt x 354.69556pt.
 
-Underfull \vbox (badness 10000) detected at line 493
+Underfull \vbox (badness 10000) detected at line 499
  []
 
 [7]
-Overfull \vbox (0.79068pt too high) detected at line 555
+Overfull \vbox (0.79068pt too high) detected at line 561
  []
 
 [8 <./notebooks_quickstart_18_1.png>]
 <notebooks_quickstart_20_1.png, id=371, 507.3354pt x 383.031pt>
 File: notebooks_quickstart_20_1.png Graphic file (type png)
 <use notebooks_quickstart_20_1.png>
-Package pdftex.def Info: notebooks_quickstart_20_1.png  used on input line 574.
+Package pdftex.def Info: notebooks_quickstart_20_1.png  used on input line 580.
 
 (pdftex.def)             Requested size: 469.77333pt x 354.69556pt.
 
-Underfull \vbox (badness 10000) detected at line 617
+Underfull \vbox (badness 10000) detected at line 623
  []
 
 [9 <./notebooks_quickstart_20_1.png>]
 <notebooks_quickstart_20_3.png, id=378, 507.3354pt x 393.8715pt>
 File: notebooks_quickstart_20_3.png Graphic file (type png)
 <use notebooks_quickstart_20_3.png>
-Package pdftex.def Info: notebooks_quickstart_20_3.png  used on input line 636.
+Package pdftex.def Info: notebooks_quickstart_20_3.png  used on input line 642.
 
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
 
-Underfull \vbox (badness 10000) detected at line 679
+Underfull \vbox (badness 10000) detected at line 685
  []
 
 [10 <./notebooks_quickstart_20_3.png>]
 <notebooks_quickstart_20_5.png, id=384, 507.3354pt x 393.8715pt>
 File: notebooks_quickstart_20_5.png Graphic file (type png)
 <use notebooks_quickstart_20_5.png>
-Package pdftex.def Info: notebooks_quickstart_20_5.png  used on input line 699.
+Package pdftex.def Info: notebooks_quickstart_20_5.png  used on input line 705.
 
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
 
-Underfull \vbox (badness 10000) detected at line 703
+Underfull \vbox (badness 10000) detected at line 709
  []
 
 [11] [12 <./notebooks_quickstart_20_5.png>] [13] [14
@@ -963,7 +963,7 @@ Chapter 2.
 <mytable.png, id=402, 1344.222pt x 337.26pt>
 File: mytable.png Graphic file (type png)
 <use mytable.png>
-Package pdftex.def Info: mytable.png  used on input line 781.
+Package pdftex.def Info: mytable.png  used on input line 787.
 (pdftex.def)             Requested size: 469.74635pt x 117.84206pt.
 [15 <./mytable.png>] [16
 
@@ -973,10 +973,10 @@ Chapter 3.
 File: notebooks_reliability_diagram_3_0.png Graphic file (type png)
 <use notebooks_reliability_diagram_3_0.png>
 Package pdftex.def Info: notebooks_reliability_diagram_3_0.png  used on input l
-ine 850.
+ine 856.
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
 
-Underfull \vbox (badness 10000) detected at line 854
+Underfull \vbox (badness 10000) detected at line 860
  []
 
 [17]
@@ -984,14 +984,14 @@ Underfull \vbox (badness 10000) detected at line 854
 File: notebooks_reliability_diagram_5_0.png Graphic file (type png)
 <use notebooks_reliability_diagram_5_0.png>
 Package pdftex.def Info: notebooks_reliability_diagram_5_0.png  used on input l
-ine 887.
+ine 893.
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
 
-Underfull \vbox (badness 10000) detected at line 891
+Underfull \vbox (badness 10000) detected at line 897
  []
 
 [18 <./notebooks_reliability_diagram_3_0.png>]
-Underfull \vbox (badness 3240) detected at line 923
+Underfull \vbox (badness 3240) detected at line 929
  []
 
 [19 <./notebooks_reliability_diagram_5_0.png>]
@@ -999,10 +999,10 @@ Underfull \vbox (badness 3240) detected at line 923
 File: notebooks_reliability_diagram_8_0.png Graphic file (type png)
 <use notebooks_reliability_diagram_8_0.png>
 Package pdftex.def Info: notebooks_reliability_diagram_8_0.png  used on input l
-ine 950.
+ine 956.
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
 
-Underfull \vbox (badness 10000) detected at line 969
+Underfull \vbox (badness 10000) detected at line 975
  []
 
 [20 <./notebooks_reliability_diagram_8_0.png>]
@@ -1010,7 +1010,7 @@ Underfull \vbox (badness 10000) detected at line 969
 File: notebooks_reliability_diagram_9_0.png Graphic file (type png)
 <use notebooks_reliability_diagram_9_0.png>
 Package pdftex.def Info: notebooks_reliability_diagram_9_0.png  used on input l
-ine 984.
+ine 990.
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
  [21 <./notebooks_reliability_diagram_9_0.png>] [22
 
@@ -1020,7 +1020,7 @@ Chapter 4.
 <notebooks_ece_mce_12_1.png, id=466, 401.8212pt x 327.3831pt>
 File: notebooks_ece_mce_12_1.png Graphic file (type png)
 <use notebooks_ece_mce_12_1.png>
-Package pdftex.def Info: notebooks_ece_mce_12_1.png  used on input line 1310.
+Package pdftex.def Info: notebooks_ece_mce_12_1.png  used on input line 1316.
 (pdftex.def)             Requested size: 418.56474pt x 341.02489pt.
  [26 <./notebooks_ece_mce_12_1.png>] [27] [28
 
@@ -1030,23 +1030,26 @@ Chapter 5.
 <notebooks_hl_test_13_1.png, id=498, 409.7709pt x 327.3831pt>
 File: notebooks_hl_test_13_1.png Graphic file (type png)
 <use notebooks_hl_test_13_1.png>
-Package pdftex.def Info: notebooks_hl_test_13_1.png  used on input line 1626.
+Package pdftex.def Info: notebooks_hl_test_13_1.png  used on input line 1632.
 (pdftex.def)             Requested size: 426.85823pt x 341.02489pt.
 <notebooks_hl_test_14_1.png, id=499, 409.7709pt x 327.3831pt>
 File: notebooks_hl_test_14_1.png Graphic file (type png)
 <use notebooks_hl_test_14_1.png>
-Package pdftex.def Info: notebooks_hl_test_14_1.png  used on input line 1681.
+Package pdftex.def Info: notebooks_hl_test_14_1.png  used on input line 1687.
 (pdftex.def)             Requested size: 426.85823pt x 341.02489pt.
 
-Underfull \vbox (badness 10000) detected at line 1685
+Underfull \vbox (badness 10000) detected at line 1691
  []
 
-[32 <./notebooks_hl_test_13_1.png>] [33 <./notebooks_hl_test_14_1.png>]
-[34
+[32 <./notebooks_hl_test_13_1.png>]
+Underfull \vbox (badness 2932) detected at line 1731
+ []
 
-]
+[33 <./notebooks_hl_test_14_1.png>] [34]
 Chapter 6.
-[35] [36] [37] [38] [39] [40
+[35
+
+] [36] [37] [38] [39] [40
 
 ]
 Chapter 7.
@@ -1054,7 +1057,7 @@ Chapter 7.
 <notebooks_ici_8_1.png, id=555, 409.7709pt x 327.3831pt>
 File: notebooks_ici_8_1.png Graphic file (type png)
 <use notebooks_ici_8_1.png>
-Package pdftex.def Info: notebooks_ici_8_1.png  used on input line 2345.
+Package pdftex.def Info: notebooks_ici_8_1.png  used on input line 2414.
 (pdftex.def)             Requested size: 426.85823pt x 341.02489pt.
  [44 <./notebooks_ici_8_1.png>]
 Chapter 8.
@@ -1065,10 +1068,10 @@ Chapter 8.
 File: notebooks_spiegelhalter_z_9_1.png Graphic file (type png)
 <use notebooks_spiegelhalter_z_9_1.png>
 Package pdftex.def Info: notebooks_spiegelhalter_z_9_1.png  used on input line 
-2615.
+2684.
 (pdftex.def)             Requested size: 426.85823pt x 341.02489pt.
 
-Underfull \vbox (badness 10000) detected at line 2619
+Underfull \vbox (badness 10000) detected at line 2688
  []
 
 [47] [48 <./notebooks_spiegelhalter_z_9_1.png>]
@@ -1078,17 +1081,17 @@ Chapter 9.
 ] [50] [51] [52]
 Chapter 10.
 
-Overfull \vbox (0.92192pt too high) detected at line 3045
+Overfull \vbox (0.92192pt too high) detected at line 3114
  []
 
 [53
 
 ] [54]
-Underfull \vbox (badness 10000) detected at line 3153
+Underfull \vbox (badness 10000) detected at line 3222
  []
 
 
-Overfull \vbox (3.52196pt too high) detected at line 3153
+Overfull \vbox (3.52196pt too high) detected at line 3222
  []
 
 [55] [56]
@@ -1096,16 +1099,16 @@ Chapter 11.
 <notebooks_topclass_2_0.png, id=621, 507.3354pt x 393.8715pt>
 File: notebooks_topclass_2_0.png Graphic file (type png)
 <use notebooks_topclass_2_0.png>
-Package pdftex.def Info: notebooks_topclass_2_0.png  used on input line 3221.
+Package pdftex.def Info: notebooks_topclass_2_0.png  used on input line 3290.
 (pdftex.def)             Requested size: 469.77333pt x 364.72809pt.
 
-Underfull \vbox (badness 10000) detected at line 3225
+Underfull \vbox (badness 10000) detected at line 3294
  []
 
 [57
 
 ]
-Underfull \vbox (badness 10000) detected at line 3273
+Underfull \vbox (badness 10000) detected at line 3342
  []
 
 [58 <./notebooks_topclass_2_0.png>] [59] [60
@@ -1121,14 +1124,14 @@ LaTeX Font Info:    Font shape `TS1/txtt/m/it' in size <10> not available
 (Font)              Font shape `TS1/txtt/m/sl' tried instead on input line 1.
  [64]
 LaTeX Font Info:    Trying to load font information for U+fontawesomefree1 on i
-nput line 3593.
+nput line 3662.
 
 (/usr/share/texlive/texmf-dist/tex/latex/fontawesome5/ufontawesomefree1.fd)
-Overfull \vbox (0.52995pt too high) detected at line 3656
+Overfull \vbox (0.52995pt too high) detected at line 3725
  []
 
 [65] [66] [67] [68] [69] [70]
-Overfull \hbox (73.01103pt too wide) in paragraph at lines 4376--4380
+Overfull \hbox (73.01103pt too wide) in paragraph at lines 4445--4449
 []\T1/qtm/m/n/10 Note: - If there is a header, it must be in the for-mat: proba
 _0,proba_1,...,subgroup_1(optional),subgroup_2(optional),...,label
  []
@@ -1256,15 +1259,15 @@ Underfull \hbox (badness 10000) in paragraph at lines 116--118
 [80
 
 ])
-Package atveryend Info: Empty hook `BeforeClearDocument' on input line 5024.
-Package atveryend Info: Empty hook `AfterLastShipout' on input line 5024.
+Package atveryend Info: Empty hook `BeforeClearDocument' on input line 5093.
+Package atveryend Info: Empty hook `AfterLastShipout' on input line 5093.
  (./calzone.aux)
-Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 5024.
-Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 5024.
+Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 5093.
+Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 5093.
 
 Package rerunfilecheck Info: File `calzone.out' has not changed.
 (rerunfilecheck)             Checksum: 0B3B85C9FAD9148DEFDEF8B454953F20;11017.
-Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 5024.
+Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 5093.
  ) 
 Here is how much of TeX's memory you used:
  18988 strings out of 481239
@@ -1297,7 +1300,7 @@ fonts/type1/public/tex-gyre/qtmri.pfb></usr/share/texlive/texmf-dist/fonts/type
 xfonts/t1xtt.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/txfonts/t1xt
 t.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/txfonts/tcxtt.pfb></usr
 /share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb>
-Output written on calzone.pdf (84 pages, 1435928 bytes).
+Output written on calzone.pdf (84 pages, 1437732 bytes).
 PDF statistics:
  953 PDF objects out of 1000 (max. 8388607)
  799 compressed objects within 8 object streams
diff --git a/docs/build/latex/calzone.pdf b/docs/build/latex/calzone.pdf
index f5988f7..4f4beee 100644
Binary files a/docs/build/latex/calzone.pdf and b/docs/build/latex/calzone.pdf differ
diff --git a/docs/build/latex/calzone.tex b/docs/build/latex/calzone.tex
index 366fe90..9851a2e 100644
--- a/docs/build/latex/calzone.tex
+++ b/docs/build/latex/calzone.tex
@@ -65,7 +65,7 @@
 
 
 \title{calzone}
-\date{Oct 24, 2024}
+\date{Oct 25, 2024}
 \release{develop}
 \author{Kwok Lung "Jason" Fan, Qian Cao}
 \newcommand{\sphinxlogo}{\vbox{}}
@@ -274,7 +274,8 @@ \section{Command line interface}
                       [--prevalence\_adjustment] [--n\_bootstrap N\_BOOTSTRAP]
                       [--bootstrap\_ci BOOTSTRAP\_CI]
                       [--class\_to\_calculate CLASS\_TO\_CALCULATE]
-                      [--num\_bins NUM\_BINS] [--topclass]
+                      [--num\_bins NUM\_BINS]
+                      [--hl\_test\_validation HL\_TEST\_VALIDATION] [--topclass]
                       [--save\_metrics SAVE\_METRICS] [--plot]
                       [--plot\_bins PLOT\_BINS] [--save\_plot SAVE\_PLOT]
                       [--save\_diagram\_output SAVE\_DIAGRAM\_OUTPUT] [--verbose]
@@ -301,6 +302,9 @@ \section{Command line interface}
                         Class to calculate metrics for (default: 1)
   --num\_bins NUM\_BINS   Number of bins for ECE/MCE/HL calculations (default:
                         10)
+  --hl\_test\_validation HL\_TEST\_VALIDATION
+                        Using nbin instead of nbin-2 as HL test DOF. Use it if
+                        the dataset is validation set.
   --topclass            Whether to transform the problem to top-class problem.
   --save\_metrics SAVE\_METRICS
                         Save the metrics to a csv file
@@ -308,7 +312,8 @@ \section{Command line interface}
   --plot\_bins PLOT\_BINS
                         Number of bins for reliability diagram
   --save\_plot SAVE\_PLOT
-                        Save the plot to a file
+                        Save the plot to a file. Must end with valid image
+                        formats.
   --save\_diagram\_output SAVE\_DIAGRAM\_OUTPUT
                         Save the reliability diagram output to a file
   --verbose             Print verbose output
@@ -340,6 +345,7 @@ \section{Command line interface}
 \PYG{o}{\PYGZhy{}}\PYG{o}{\PYGZhy{}}\PYG{n}{save\PYGZus{}diagram\PYGZus{}output} \PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{../../../example\PYGZus{}data/simulated\PYGZus{}welldata\PYGZus{}diagram\PYGZus{}output.csv}\PYG{l+s+s1}{\PYGZsq{}}
 \PYG{c+c1}{\PYGZsh{}\PYGZsh{}\PYGZsh{} save\PYGZus{}diagram\PYGZus{}output only when you want to save the reliability diagram output}
 \PYG{c+c1}{\PYGZsh{}\PYGZhy{}\PYGZhy{}prevalence\PYGZus{}adjustment \PYGZsh{} only when you want to apply prevalence adjustment}
+\PYG{c+c1}{\PYGZsh{}\PYGZhy{}\PYGZhy{}hl\PYGZus{}test\PYGZus{}validation \PYGZsh{}use it only when the data is from validation set}
 \end{sphinxVerbatim}
 }
 
@@ -1352,7 +1358,7 @@ \section{Theoretical Background}
 \end{equation*}
 \sphinxAtStartPar
 where \(E_{1,m}\) is the expected number of class 1 events in the \(\text{m}^{th}\) bin, \(O_{1,m}\) is the observed number of class 1 events in the \(\text{m}^{th}\) bin, \(N_m\) is the total number of observations in the \(\text{m}^{th}\) bin, and \(M\) is the number of bins. The HL test statistic is distributed as a chi\sphinxhyphen{}squared distribution with \(M-2\) degrees of freedom. We can then use this test statistic to calculate the p\sphinxhyphen{}value for the test and determine
-whether we can reject the null hypothesis that the model is well\sphinxhyphen{}calibrated. (Notice that the degree of freedom of HL test is \(M-2\) by default but some literature suggests that the degree of freedom should be \(M\) instead when the samples is not used for training. We could not find a proof to this statement and we provides the option to specify the degree of freedom in the \sphinxcode{\sphinxupquote{calzone}}. The default value is still \(M-2\).)
+whether we can reject the null hypothesis that the model is well\sphinxhyphen{}calibrated. Notice that the degree of freedom of HL test is \(M-2\) by default but some literature suggests that the degree of freedom should be \(M\) instead when the samples is not used for training. We provides the option to specify the degree of freedom in the \sphinxcode{\sphinxupquote{calzone}}. The default value is still \(M-2\).
 
 
 \section{Pros of HL test}
@@ -1687,8 +1693,71 @@ \section{Size of HL test}
 \end{sphinxuseclass}
 \end{sphinxuseclass}
 \sphinxAtStartPar
-We can see that both the equal\sphinxhyphen{}width and the equal\sphinxhyphen{}count method have the incorrect size.
+We can see that both the equal\sphinxhyphen{}width and the equal\sphinxhyphen{}count method have the incorrect size. The simulation support the claim that the degree of freedom should be M instead of M\sphinxhyphen{}2. We can show it with simulation. We are not proving the claim here since it is beyond the scope of this documentation.
 
+\begin{sphinxuseclass}{nbinput}
+{
+\begin{sphinxVerbatim}[commandchars=\\\{\}]
+\llap{\color{nbsphinxin}[6]:\,\hspace{\fboxrule}\hspace{\fboxsep}}\PYG{c+c1}{\PYGZsh{}\PYGZsh{}\PYGZsh{} The size of HL Test}
+\PYG{k+kn}{from} \PYG{n+nn}{calzone}\PYG{n+nn}{.}\PYG{n+nn}{utils} \PYG{k+kn}{import} \PYG{n}{fake\PYGZus{}binary\PYGZus{}data\PYGZus{}generator}
+\PYG{k+kn}{from} \PYG{n+nn}{importlib} \PYG{k+kn}{import} \PYG{n}{reload}
+\PYG{k+kn}{import} \PYG{n+nn}{calzone}\PYG{n+nn}{.}\PYG{n+nn}{metrics}
+\PYG{n}{reload}\PYG{p}{(}\PYG{n}{calzone}\PYG{o}{.}\PYG{n}{metrics}\PYG{p}{)}
+\PYG{k+kn}{from} \PYG{n+nn}{calzone}\PYG{n+nn}{.}\PYG{n+nn}{metrics} \PYG{k+kn}{import} \PYG{n}{CalibrationMetrics}
+\PYG{n}{np}\PYG{o}{.}\PYG{n}{random}\PYG{o}{.}\PYG{n}{seed}\PYG{p}{(}\PYG{l+m+mi}{123}\PYG{p}{)}
+\PYG{n}{fakedata\PYGZus{}generator} \PYG{o}{=} \PYG{n}{fake\PYGZus{}binary\PYGZus{}data\PYGZus{}generator}\PYG{p}{(}\PYG{n}{alpha\PYGZus{}val}\PYG{o}{=}\PYG{l+m+mf}{0.5}\PYG{p}{,} \PYG{n}{beta\PYGZus{}val}\PYG{o}{=}\PYG{l+m+mf}{0.5}\PYG{p}{)}
+\PYG{n}{cal\PYGZus{}metrics} \PYG{o}{=} \PYG{n}{CalibrationMetrics}\PYG{p}{(}\PYG{p}{)}
+\PYG{n}{sample\PYGZus{}size} \PYG{o}{=} \PYG{l+m+mi}{1000}
+\PYG{n}{simulation\PYGZus{}size} \PYG{o}{=} \PYG{l+m+mi}{10000}
+\PYG{n}{results} \PYG{o}{=} \PYG{p}{[}\PYG{p}{]}
+\PYG{c+c1}{\PYGZsh{} generate data}
+\PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{simulation\PYGZus{}size}\PYG{p}{)}\PYG{p}{:}
+    \PYG{n}{X}\PYG{p}{,} \PYG{n}{y} \PYG{o}{=} \PYG{n}{fakedata\PYGZus{}generator}\PYG{o}{.}\PYG{n}{generate\PYGZus{}data}\PYG{p}{(}\PYG{n}{sample\PYGZus{}size}\PYG{p}{)}
+    \PYG{k}{if} \PYG{n}{i} \PYG{o}{==} \PYG{l+m+mi}{0}\PYG{p}{:}
+        \PYG{n}{tempresult} \PYG{o}{=} \PYG{n}{cal\PYGZus{}metrics}\PYG{o}{.}\PYG{n}{calculate\PYGZus{}metrics}\PYG{p}{(}\PYG{n}{y}\PYG{p}{,} \PYG{n}{X}\PYG{p}{,} \PYG{p}{[}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{HL\PYGZhy{}H}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{HL\PYGZhy{}C}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}\PYG{n}{return\PYGZus{}numpy}\PYG{o}{=}\PYG{k+kc}{False}\PYG{p}{,} \PYG{n}{df} \PYG{o}{=} \PYG{l+m+mi}{10}\PYG{p}{)}
+        \PYG{n}{keys} \PYG{o}{=} \PYG{n+nb}{list}\PYG{p}{(}\PYG{n}{tempresult}\PYG{o}{.}\PYG{n}{keys}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}
+        \PYG{n}{results}\PYG{o}{.}\PYG{n}{append}\PYG{p}{(}\PYG{n}{np}\PYG{o}{.}\PYG{n}{array}\PYG{p}{(}\PYG{n+nb}{list}\PYG{p}{(}\PYG{n}{tempresult}\PYG{o}{.}\PYG{n}{values}\PYG{p}{(}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}\PYG{p}{)}
+    \PYG{k}{else}\PYG{p}{:}
+        \PYG{n}{tempresult} \PYG{o}{=} \PYG{n}{cal\PYGZus{}metrics}\PYG{o}{.}\PYG{n}{calculate\PYGZus{}metrics}\PYG{p}{(}\PYG{n}{y}\PYG{p}{,} \PYG{n}{X}\PYG{p}{,} \PYG{p}{[}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{HL\PYGZhy{}H}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{,} \PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+s1}{HL\PYGZhy{}C}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{]}\PYG{p}{,}\PYG{n}{return\PYGZus{}numpy}\PYG{o}{=}\PYG{k+kc}{True}\PYG{p}{,} \PYG{n}{df} \PYG{o}{=} \PYG{l+m+mi}{10}\PYG{p}{)}
+        \PYG{n}{results}\PYG{o}{.}\PYG{n}{append}\PYG{p}{(}\PYG{n}{tempresult}\PYG{p}{)}
+\PYG{n}{results} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{array}\PYG{p}{(}\PYG{n}{results}\PYG{p}{)}
+
+\PYG{n}{hl\PYGZus{}h\PYGZus{}pvalue} \PYG{o}{=} \PYG{n}{results}\PYG{p}{[}\PYG{p}{:}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}
+\PYG{n}{hl\PYGZus{}c\PYGZus{}pvalue} \PYG{o}{=} \PYG{n}{results}\PYG{p}{[}\PYG{p}{:}\PYG{p}{,}\PYG{l+m+mi}{3}\PYG{p}{]}
+\PYG{n}{size\PYGZus{}h} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{mean}\PYG{p}{(}\PYG{n}{hl\PYGZus{}h\PYGZus{}pvalue} \PYG{o}{\PYGZlt{}} \PYG{l+m+mf}{0.05}\PYG{p}{)}
+\PYG{n}{size\PYGZus{}c} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{mean}\PYG{p}{(}\PYG{n}{hl\PYGZus{}c\PYGZus{}pvalue} \PYG{o}{\PYGZlt{}} \PYG{l+m+mf}{0.05}\PYG{p}{)}
+\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{The size of HL\PYGZhy{}H with df=M is :}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{,} \PYG{n+nb}{round}\PYG{p}{(}\PYG{n}{size\PYGZus{}h}\PYG{p}{,}\PYG{l+m+mi}{3}\PYG{p}{)}\PYG{p}{)}
+\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{The size of HL\PYGZhy{}C with df=M  is :}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{,} \PYG{n+nb}{round}\PYG{p}{(}\PYG{n}{size\PYGZus{}c}\PYG{p}{,}\PYG{l+m+mi}{3}\PYG{p}{)}\PYG{p}{)}
+\end{sphinxVerbatim}
+}
+
+\end{sphinxuseclass}
+\begin{sphinxuseclass}{nboutput}
+\begin{sphinxuseclass}{nblast}
+{
+
+\kern-\sphinxverbatimsmallskipamount\kern-\baselineskip
+\kern+\FrameHeightAdjust\kern-\fboxrule
+\vspace{\nbsphinxcodecellspacing}
+
+\sphinxsetup{VerbatimColor={named}{white}}
+\begin{sphinxuseclass}{output_area}
+\begin{sphinxuseclass}{}
+
+
+\begin{sphinxVerbatim}[commandchars=\\\{\}]
+The size of HL-H with df=M is : 0.047
+The size of HL-C with df=M  is : 0.055
+\end{sphinxVerbatim}
+
+
+
+\end{sphinxuseclass}
+\end{sphinxuseclass}
+}
+
+\end{sphinxuseclass}
+\end{sphinxuseclass}
 
 \section{Reference}
 \label{\detokenize{notebooks/hl_test:Reference}}
@@ -3843,7 +3912,7 @@ \subsection{calzone.metrics module}
 \begin{fulllineitems}
 \phantomsection\label{\detokenize{calzone:calzone.metrics.hosmer_lemeshow_test}}
 \pysigstartsignatures
-\pysiglinewithargsret{\sphinxcode{\sphinxupquote{calzone.metrics.}}\sphinxbfcode{\sphinxupquote{hosmer\_lemeshow\_test}}}{\sphinxparam{\DUrole{n}{reliability}}\sphinxparamcomma \sphinxparam{\DUrole{n}{confidence}}\sphinxparamcomma \sphinxparam{\DUrole{n}{bin\_count}}\sphinxparamcomma \sphinxparam{\DUrole{n}{df}\DUrole{o}{=}\DUrole{default_value}{None}}}{}
+\pysiglinewithargsret{\sphinxcode{\sphinxupquote{calzone.metrics.}}\sphinxbfcode{\sphinxupquote{hosmer\_lemeshow\_test}}}{\sphinxparam{\DUrole{n}{reliability}}\sphinxparamcomma \sphinxparam{\DUrole{n}{confidence}}\sphinxparamcomma \sphinxparam{\DUrole{n}{bin\_count}}\sphinxparamcomma \sphinxparam{\DUrole{n}{df}\DUrole{o}{=}\DUrole{default_value}{None}}\sphinxparamcomma \sphinxparam{\DUrole{o}{**}\DUrole{n}{kwargs}}}{}
 \pysigstopsignatures
 \sphinxAtStartPar
 Compute the Hosmer\sphinxhyphen{}Lemeshow test for goodness of fit.
diff --git a/docs/build/latex/calzone.toc b/docs/build/latex/calzone.toc
index 3d6e916..b4be79c 100644
--- a/docs/build/latex/calzone.toc
+++ b/docs/build/latex/calzone.toc
@@ -21,7 +21,7 @@
 \contentsline {section}{\numberline {5.3}Cons of HL Test}{30}{section.5.3}%
 \contentsline {section}{\numberline {5.4}Calculating HL test statistics and p\sphinxhyphen {}value with calzone}{30}{section.5.4}%
 \contentsline {section}{\numberline {5.5}Size of HL test}{31}{section.5.5}%
-\contentsline {section}{\numberline {5.6}Reference}{33}{section.5.6}%
+\contentsline {section}{\numberline {5.6}Reference}{34}{section.5.6}%
 \contentsline {chapter}{\numberline {6}COX calibration analysis}{35}{chapter.6}%
 \contentsline {section}{\numberline {6.1}Theoretical Background}{35}{section.6.1}%
 \contentsline {section}{\numberline {6.2}Pros of Cox calibration analysis}{35}{section.6.2}%
diff --git a/docs/source/notebooks/hl_test.ipynb b/docs/source/notebooks/hl_test.ipynb
index ec53edd..41e8204 100644
--- a/docs/source/notebooks/hl_test.ipynb
+++ b/docs/source/notebooks/hl_test.ipynb
@@ -20,7 +20,7 @@
     "\\text{HL} = \\sum_{m=1}^{M} \\left[\\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}} + \\frac{(O_{0,m}-E_{0,m})^2}{E_{0,m}}\\right]  = \\sum_{m=1}^{M} \\frac{(O_{1,m}-E_{1,m})^2}{E_{1,m}(1-\\frac{E_{1,m}}{N_m})} \\sim \\chi^2_{M-2}\n",
     "$$\n",
     "\n",
-    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. (Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We could not find a proof to this statement and we provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$.)"
+    "where $E_{1,m}$ is the expected number of class 1 events in the $\\text{m}^{th}$ bin, $O_{1,m}$ is the observed number of class 1 events in the $\\text{m}^{th}$ bin, $N_m$ is the total number of observations in the $\\text{m}^{th}$ bin, and $M$ is the number of bins. The HL test statistic is distributed as a chi-squared distribution with $M-2$ degrees of freedom. We can then use this test statistic to calculate the p-value for the test and determine whether we can reject the null hypothesis that the model is well-calibrated. Notice that the degree of freedom of HL test is $M-2$ by default but some literature suggests that the degree of freedom should be $M$ instead when the samples is not used for training. We provides the option to specify the degree of freedom in the `calzone`. The default value is still $M-2$."
    ]
   },
   {
@@ -280,7 +280,54 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that both the equal-width and the equal-count method have the incorrect size."
+    "We can see that both the equal-width and the equal-count method have the incorrect size. The simulation support the claim that the degree of freedom should be M instead of M-2. We can show it with simulation. We are not proving the claim here since it is beyond the scope of this documentation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The size of HL-H with df=M is : 0.047\n",
+      "The size of HL-C with df=M  is : 0.055\n"
+     ]
+    }
+   ],
+   "source": [
+    "### The size of HL Test\n",
+    "from calzone.utils import fake_binary_data_generator\n",
+    "from importlib import reload\n",
+    "import calzone.metrics\n",
+    "reload(calzone.metrics)\n",
+    "from calzone.metrics import CalibrationMetrics\n",
+    "np.random.seed(123)\n",
+    "fakedata_generator = fake_binary_data_generator(alpha_val=0.5, beta_val=0.5)\n",
+    "cal_metrics = CalibrationMetrics()\n",
+    "sample_size = 1000\n",
+    "simulation_size = 10000\n",
+    "results = []\n",
+    "# generate data\n",
+    "for i in range(simulation_size):\n",
+    "    X, y = fakedata_generator.generate_data(sample_size)\n",
+    "    if i == 0:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=False, df = 10)\n",
+    "        keys = list(tempresult.keys())\n",
+    "        results.append(np.array(list(tempresult.values())))\n",
+    "    else:\n",
+    "        tempresult = cal_metrics.calculate_metrics(y, X, ['HL-H', 'HL-C'],return_numpy=True, df = 10)\n",
+    "        results.append(tempresult)\n",
+    "results = np.array(results)\n",
+    "\n",
+    "hl_h_pvalue = results[:,1]\n",
+    "hl_c_pvalue = results[:,3]\n",
+    "size_h = np.mean(hl_h_pvalue < 0.05)\n",
+    "size_c = np.mean(hl_c_pvalue < 0.05)\n",
+    "print(\"The size of HL-H with df=M is :\", round(size_h,3))\n",
+    "print(\"The size of HL-C with df=M  is :\", round(size_c,3))"
    ]
   },
   {
diff --git a/docs/source/notebooks/quickstart.ipynb b/docs/source/notebooks/quickstart.ipynb
index 1e5b953..63d7bc1 100644
--- a/docs/source/notebooks/quickstart.ipynb
+++ b/docs/source/notebooks/quickstart.ipynb
@@ -184,7 +184,8 @@
       "                      [--prevalence_adjustment] [--n_bootstrap N_BOOTSTRAP]\n",
       "                      [--bootstrap_ci BOOTSTRAP_CI]\n",
       "                      [--class_to_calculate CLASS_TO_CALCULATE]\n",
-      "                      [--num_bins NUM_BINS] [--topclass]\n",
+      "                      [--num_bins NUM_BINS]\n",
+      "                      [--hl_test_validation HL_TEST_VALIDATION] [--topclass]\n",
       "                      [--save_metrics SAVE_METRICS] [--plot]\n",
       "                      [--plot_bins PLOT_BINS] [--save_plot SAVE_PLOT]\n",
       "                      [--save_diagram_output SAVE_DIAGRAM_OUTPUT] [--verbose]\n",
@@ -211,6 +212,9 @@
       "                        Class to calculate metrics for (default: 1)\n",
       "  --num_bins NUM_BINS   Number of bins for ECE/MCE/HL calculations (default:\n",
       "                        10)\n",
+      "  --hl_test_validation HL_TEST_VALIDATION\n",
+      "                        Using nbin instead of nbin-2 as HL test DOF. Use it if\n",
+      "                        the dataset is validation set.\n",
       "  --topclass            Whether to transform the problem to top-class problem.\n",
       "  --save_metrics SAVE_METRICS\n",
       "                        Save the metrics to a csv file\n",
@@ -218,7 +222,8 @@
       "  --plot_bins PLOT_BINS\n",
       "                        Number of bins for reliability diagram\n",
       "  --save_plot SAVE_PLOT\n",
-      "                        Save the plot to a file\n",
+      "                        Save the plot to a file. Must end with valid image\n",
+      "                        formats.\n",
       "  --save_diagram_output SAVE_DIAGRAM_OUTPUT\n",
       "                        Save the reliability diagram output to a file\n",
       "  --verbose             Print verbose output\n"
@@ -290,7 +295,8 @@
     "--verbose \\\n",
     "--save_diagram_output '../../../example_data/simulated_welldata_diagram_output.csv' \n",
     "### save_diagram_output only when you want to save the reliability diagram output\n",
-    "#--prevalence_adjustment # only when you want to apply prevalence adjustment"
+    "#--prevalence_adjustment # only when you want to apply prevalence adjustment\n",
+    "#--hl_test_validation #use it only when the data is from validation set"
    ]
   },
   {
diff --git a/paper/paper.md b/paper/paper.md
index ecc30e1..1d4f3b2 100644
--- a/paper/paper.md
+++ b/paper/paper.md
@@ -89,7 +89,7 @@ plot_reliability_diagram(
 `calzone` provides functions to compute various calibration metrics. `calzone` also has a `CalibrationMetrics()` class which allows the user to compute the calibration metrics in a more convenient way. The following are the metrics that are currently supported in `calzone`: 
 
 ### Expected Calibration Error (ECE) and Maximum Calibration Error (MCE)
-Expected Calibration Error (ECE), Maximum Calibration Error (MCE) and binning-based methods [@guo_calibration;@Naeini_ece] aim to measure the average deviation between predicted probability and true probability. We provide the option to use equal-width binning or equal-frequency binning, labeled as ECE-H and ECE-C respectively. Users can also choose to compute the metrics for the class-of-interest or the top-class. In the case of class-of-interest, the program will treat it as a 1-vs-rest classification problem. It can be computed in `calzone` as follows:
+Expected Calibration Error (ECE), Maximum Calibration Error (MCE) and binning-based methods [@guo_calibration;@Naeini_ece] aim to measure the average deviation between predicted probability and true probability. We provide the option to use equal-width binning or equal-count binning, labeled as ECE-H and ECE-C respectively. Users can also choose to compute the metrics for the class-of-interest or the top-class. In the case of class-of-interest, the program will treat it as a 1-vs-rest classification problem. It can be computed in `calzone` as follows:
 
 ```python
 from calzone.metrics import calculate_ece_mce
@@ -125,6 +125,7 @@ HL_H_ts, HL_H_p, df = hosmer_lemeshow_test(
     bin_count=bin_counts
 )
 ```
+When performing HL test on validation set that are not used in training, we observed from simulation that the degree of freedom of HL test changes from $M-2$ to $M$ but we currently do not have a proof to it but allows the user to choose the degree of freedom in the program.
 
 
 ### Cox's calibration slope/intercept
@@ -142,7 +143,7 @@ cox_slope, cox_intercept, cox_slope_ci, cox_intercept_ci = cox_regression_analys
     fix_slope=True
 )
 ```
-The values of the slope and intercept give you a sense of the form of miscalibration. A slope greater than 1 indicates that the model is overconfident at high probabilities and underconfident at low probabilities, and vice versa. An intercept greater than 0 indicates that the model is overconfident in general, and vice versa. Notice that even if the slope is 1 and the intercept is 0, the model might not be calibrated, as Cox's calibration analysis fails to capture some types of miscalibration.
+The values of the slope and intercept give you a sense of the form of miscalibration. A slope greater than 1 indicates that the model is overconfident at high probabilities and underconfident at low probabilities, and vice versa. An intercept greater than 0 indicates that the model is overconfident in general, and vice versa. Notice that even if the slope is 1 and the intercept is 0, the model might not be calibrated, as Cox's calibration analysis fails to capture some types of miscalibration, including quadratic effects or other non-linearities.
 
 ### Integrated calibration index (ICI)