Skip to content

Commit

Permalink
Adding reference
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonfan1997 committed Oct 29, 2024
1 parent 0115ec4 commit b919ebf
Show file tree
Hide file tree
Showing 17 changed files with 279 additions and 173 deletions.
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
50 changes: 34 additions & 16 deletions docs/build/doctrees/nbsphinx/notebooks/validation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,20 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Difference for uniform/histogram binning:\n",
"Difference for equal-width binning:\n",
"Reliability difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
"Confidence difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
"\n",
"Difference for quantile/equal frequency binning:\n",
"Reliability difference: [1.e-05 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00\n",
" 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00]\n",
"Confidence difference: [1.e-05 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00\n",
" 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00]\n"
"Difference for equal-count binning:\n",
"Reliability difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
"Confidence difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
]
}
],
Expand All @@ -51,17 +49,21 @@
"### loading the data\n",
"wellcal_dataloader = data_loader(data_path=\"../../../example_data/simulated_welldata.csv\")\n",
"\n",
"###scikit-learn implementation\n",
"scikit_reliability_H,scikit_confidence_H = calibration_curve(wellcal_dataloader.labels,wellcal_dataloader.probs[:,1],n_bins=15,strategy='uniform',pos_label=1)\n",
"scikit_reliability_C,scikit_confidence_C = calibration_curve(wellcal_dataloader.labels,wellcal_dataloader.probs[:,1],n_bins=15,strategy='quantile',pos_label=1)\n",
"\n",
"### calzone implementation\n",
"calzone_reliability_H,calzone_confindence_H,bin_edge_H,bin_count_H = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=1, is_equal_freq=False)\n",
"calzone_reliability_C,calzone_confindence_C,bin_edge_C,bin_count_C = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=1, is_equal_freq=True)\n",
"\n",
"print(\"Difference for uniform/histogram binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_H - calzone_reliability_H), 5))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_H - calzone_confindence_H), 5))\n",
"print(\"\\nDifference for quantile/equal frequency binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_C - calzone_reliability_C), 5))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_C - calzone_confindence_C), 5))"
"###showing the difference between the two implementations\n",
"print(\"Difference for equal-width binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_H - calzone_reliability_H), 4))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_H - calzone_confindence_H), 4))\n",
"print(\"\\nDifference for equal-count binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_C - calzone_reliability_C), 4))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_C - calzone_confindence_C), 4))"
]
},
{
Expand All @@ -82,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -100,13 +102,14 @@
"calzone_reliability_topclass_H,calzone_confindence_topclass_H,bin_edge_topclass_H,bin_count_topclass_H = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=None, is_equal_freq=False)\n",
"calzone_reliability_topclass_C,calzone_confindence_topclass_C,bin_edge_topclass_C,bin_count_topclass_C = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=None, is_equal_freq=True)\n",
"\n",
"### compare MAPIE and calzone equal-width binning\n",
"print(\"MAPIE topclass ECE-H:\",top_label_ece(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins = 15,split_strategy='uniform'))\n",
"print(\"calzone topclass ECE-H:\",calculate_ece_mce(calzone_reliability_topclass_H,calzone_confindence_topclass_H,bin_count_topclass_H)[0])\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -119,6 +122,7 @@
}
],
"source": [
"### compare MAPIE and calzone equal-count binning\n",
"print(\"MAPIE topclass ECE-C:\",top_label_ece(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins = 15,split_strategy='quantile'))\n",
"print(\"calzone topclass ECE-C:\",calculate_ece_mce(calzone_reliability_topclass_C,calzone_confindence_topclass_C,bin_count_topclass_C)[0])\n"
]
Expand All @@ -132,7 +136,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand All @@ -145,6 +149,7 @@
}
],
"source": [
"### compare the Z statistics\n",
"print(\"MAPIE Z statistic\", spiegelhalter_statistic(wellcal_dataloader.labels,wellcal_dataloader.probs[:,1]))\n",
"print(\"calzone Z statistic\", spiegelhalter_z_test(wellcal_dataloader.labels,wellcal_dataloader.probs)[0])"
]
Expand Down Expand Up @@ -233,6 +238,19 @@
"source": [
"We see that the test statistics are the same. The R package doesn't allow user input degree of freedom so the p-value is different as expected."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## reference\n",
"\n",
"Taquet, V., Blot, V., Morzadec, T., Lacombe, L., & Brunel, N. (2022). MAPIE: an open-source library for distribution-free uncertainty quantification. arXiv preprint arXiv:2207.12274.\n",
"\n",
"Pedregosa, F., Varoquaux, Ga\"el, Gramfort, A., Michel, V., Thirion, B., Grisel, O., … others. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12(Oct), 2825–2830.\n",
"\n",
"Lele, S. R., Keim, J. L., & Solymos, P. (2017). Resource selection (probability) functions for use-availability data. Package ‘ResourceSelection’, Version 0.3-2."
]
}
],
"metadata": {
Expand Down
Binary file modified docs/build/doctrees/notebooks/validation.doctree
Binary file not shown.
50 changes: 34 additions & 16 deletions docs/build/html/_sources/notebooks/validation.ipynb.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,20 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Difference for uniform/histogram binning:\n",
"Difference for equal-width binning:\n",
"Reliability difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
"Confidence difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
"\n",
"Difference for quantile/equal frequency binning:\n",
"Reliability difference: [1.e-05 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00\n",
" 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00]\n",
"Confidence difference: [1.e-05 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00\n",
" 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00]\n"
"Difference for equal-count binning:\n",
"Reliability difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
"Confidence difference: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
]
}
],
Expand All @@ -51,17 +49,21 @@
"### loading the data\n",
"wellcal_dataloader = data_loader(data_path=\"../../../example_data/simulated_welldata.csv\")\n",
"\n",
"###scikit-learn implementation\n",
"scikit_reliability_H,scikit_confidence_H = calibration_curve(wellcal_dataloader.labels,wellcal_dataloader.probs[:,1],n_bins=15,strategy='uniform',pos_label=1)\n",
"scikit_reliability_C,scikit_confidence_C = calibration_curve(wellcal_dataloader.labels,wellcal_dataloader.probs[:,1],n_bins=15,strategy='quantile',pos_label=1)\n",
"\n",
"### calzone implementation\n",
"calzone_reliability_H,calzone_confindence_H,bin_edge_H,bin_count_H = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=1, is_equal_freq=False)\n",
"calzone_reliability_C,calzone_confindence_C,bin_edge_C,bin_count_C = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=1, is_equal_freq=True)\n",
"\n",
"print(\"Difference for uniform/histogram binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_H - calzone_reliability_H), 5))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_H - calzone_confindence_H), 5))\n",
"print(\"\\nDifference for quantile/equal frequency binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_C - calzone_reliability_C), 5))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_C - calzone_confindence_C), 5))"
"###showing the difference between the two implementations\n",
"print(\"Difference for equal-width binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_H - calzone_reliability_H), 4))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_H - calzone_confindence_H), 4))\n",
"print(\"\\nDifference for equal-count binning:\")\n",
"print(\"Reliability difference:\", np.round(np.abs(scikit_reliability_C - calzone_reliability_C), 4))\n",
"print(\"Confidence difference:\", np.round(np.abs(scikit_confidence_C - calzone_confindence_C), 4))"
]
},
{
Expand All @@ -82,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -100,13 +102,14 @@
"calzone_reliability_topclass_H,calzone_confindence_topclass_H,bin_edge_topclass_H,bin_count_topclass_H = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=None, is_equal_freq=False)\n",
"calzone_reliability_topclass_C,calzone_confindence_topclass_C,bin_edge_topclass_C,bin_count_topclass_C = reliability_diagram(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins=15, class_to_plot=None, is_equal_freq=True)\n",
"\n",
"### compare MAPIE and calzone equal-width binning\n",
"print(\"MAPIE topclass ECE-H:\",top_label_ece(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins = 15,split_strategy='uniform'))\n",
"print(\"calzone topclass ECE-H:\",calculate_ece_mce(calzone_reliability_topclass_H,calzone_confindence_topclass_H,bin_count_topclass_H)[0])\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -119,6 +122,7 @@
}
],
"source": [
"### compare MAPIE and calzone equal-count binning\n",
"print(\"MAPIE topclass ECE-C:\",top_label_ece(wellcal_dataloader.labels,wellcal_dataloader.probs,num_bins = 15,split_strategy='quantile'))\n",
"print(\"calzone topclass ECE-C:\",calculate_ece_mce(calzone_reliability_topclass_C,calzone_confindence_topclass_C,bin_count_topclass_C)[0])\n"
]
Expand All @@ -132,7 +136,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand All @@ -145,6 +149,7 @@
}
],
"source": [
"### compare the Z statistics\n",
"print(\"MAPIE Z statistic\", spiegelhalter_statistic(wellcal_dataloader.labels,wellcal_dataloader.probs[:,1]))\n",
"print(\"calzone Z statistic\", spiegelhalter_z_test(wellcal_dataloader.labels,wellcal_dataloader.probs)[0])"
]
Expand Down Expand Up @@ -233,6 +238,19 @@
"source": [
"We see that the test statistics are the same. The R package doesn't allow user input degree of freedom so the p-value is different as expected."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## reference\n",
"\n",
"Taquet, V., Blot, V., Morzadec, T., Lacombe, L., & Brunel, N. (2022). MAPIE: an open-source library for distribution-free uncertainty quantification. arXiv preprint arXiv:2207.12274.\n",
"\n",
"Pedregosa, F., Varoquaux, Ga\"el, Gramfort, A., Michel, V., Thirion, B., Grisel, O., … others. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12(Oct), 2825–2830.\n",
"\n",
"Lele, S. R., Keim, J. L., & Solymos, P. (2017). Resource selection (probability) functions for use-availability data. Package ‘ResourceSelection’, Version 0.3-2."
]
}
],
"metadata": {
Expand Down
1 change: 1 addition & 0 deletions docs/build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ <h1>Welcome to the documentation for calzone<a class="headerlink" href="#welcome
<li class="toctree-l2"><a class="reference internal" href="notebooks/validation.html#Reliability-diagram">Reliability diagram</a></li>
<li class="toctree-l2"><a class="reference internal" href="notebooks/validation.html#Expected-calibration-error-and-Z-test">Expected calibration error and Z test</a></li>
<li class="toctree-l2"><a class="reference internal" href="notebooks/validation.html#HL-test">HL test</a></li>
<li class="toctree-l2"><a class="reference internal" href="notebooks/validation.html#reference">reference</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules.html">calzone</a><ul>
Expand Down
Loading

0 comments on commit b919ebf

Please sign in to comment.