Skip to content

Commit

Permalink
Change return text for Fastapi
Browse files Browse the repository at this point in the history
  • Loading branch information
MarkoBrie committed Mar 4, 2024
1 parent b5a708b commit 0e4e34c
Show file tree
Hide file tree
Showing 10 changed files with 308,415 additions and 3,119 deletions.
158 changes: 104 additions & 54 deletions .ipynb_checkpoints/2_Model_selection-checkpoint.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
"id": "d2c08eb9",
"metadata": {},
"source": [
"**Supervised:** The labels are included in the training data and the goal is to train a model to learn to predict the labels from the features \n",
"**Classification:** The label is a binary variable, 0 (will repay loan on time), 1 (will have difficulty repaying loan)\n",
"\n",
"- Evaluation des performances des modèles d’apprentissage supervisé selon différents critères (scores, temps d'entraînement, etc.) en adaptant les paramètres afin de choisir le modèle le plus performant pour la problématique métier.\n",
"- Calcul un score avec fp + 10 * fn\n",
"\n",
Expand Down Expand Up @@ -45,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 139,
"execution_count": 258,
"id": "1c8b0045",
"metadata": {},
"outputs": [],
Expand All @@ -58,6 +61,7 @@
"\n",
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import precision_recall_curve\n",
"from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score\n",
Expand Down Expand Up @@ -128,7 +132,7 @@
{
"cell_type": "code",
"execution_count": 255,
"id": "81ed0da2",
"id": "2d0bbc23",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -2318,61 +2322,64 @@
"id": "9e08b8cf",
"metadata": {},
"source": [
"<a name='4'></a>\n",
"<a name='5'></a>\n",
"# 5 Evalutation and selection of best model"
]
},
{
"cell_type": "code",
"execution_count": 254,
"execution_count": 264,
"id": "73845c9f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" time_in_s FP_10_FN FP TP Accuracy \\\n",
"Run Name \n",
"LGBM_Shap002 508.117909 35429.0 15139.0 2855.0 0.720859 \n",
"RFC_newFEATURE_001 13614.842443 36741.0 19881.0 3198.0 0.649334 \n",
"XGB_Shap002 401.376422 32718.0 12948.0 2907.0 0.757329 \n",
"RFC_newFEATURE_002 5635.813629 35370.0 17020.0 3049.0 0.693430 \n",
"RFC_newFEATURE 12256.283958 36534.0 19814.0 3212.0 0.650651 \n",
"XGB 26824.582238 32675.0 12635.0 2880.0 0.761979 \n",
"RFC 13921.119884 37986.0 18046.0 2890.0 0.674162 \n",
"RFC_smote 1510.164263 35067.0 10407.0 2418.0 0.790693 \n",
"LightGBM_smote 302.068259 35415.0 15435.0 2886.0 0.716550 \n",
"LightGBM 499.470917 35415.0 15435.0 2886.0 0.716550 \n",
"XGB_smote 194.733210 33112.0 18292.0 3402.0 0.678487 \n",
" time_in_s FP_10_FN FP TP Accuracy \\\n",
"Run Name \n",
"LGBM_Shap002_scaled 485.448492 34979.0 15079.0 2894.0 0.722469 \n",
"LGBM_Shap002 508.117909 35429.0 15139.0 2855.0 0.720859 \n",
"RFC_newFEATURE_001 13614.842443 36741.0 19881.0 3198.0 0.649334 \n",
"XGB_Shap002 401.376422 32718.0 12948.0 2907.0 0.757329 \n",
"RFC_newFEATURE_002 5635.813629 35370.0 17020.0 3049.0 0.693430 \n",
"RFC_newFEATURE 12256.283958 36534.0 19814.0 3212.0 0.650651 \n",
"XGB 26824.582238 32675.0 12635.0 2880.0 0.761979 \n",
"RFC 13921.119884 37986.0 18046.0 2890.0 0.674162 \n",
"RFC_smote 1510.164263 35067.0 10407.0 2418.0 0.790693 \n",
"LightGBM_smote 302.068259 35415.0 15435.0 2886.0 0.716550 \n",
"LightGBM 499.470917 35415.0 15435.0 2886.0 0.716550 \n",
"XGB_smote 194.733210 33112.0 18292.0 3402.0 0.678487 \n",
"\n",
" Recall threshold ROC_AUC FN Precision \\\n",
"Run Name \n",
"LGBM_Shap002 0.584562 0.2 0.658589 2029.0 0.158664 \n",
"RFC_newFEATURE_001 0.654791 0.1 0.651827 1686.0 0.138568 \n",
"XGB_Shap002 0.595209 0.1 0.683261 1977.0 0.183349 \n",
"RFC_newFEATURE_002 0.624283 0.1 0.661839 1835.0 0.151926 \n",
"RFC_newFEATURE 0.657658 0.1 0.653852 1672.0 0.139494 \n",
"XGB 0.589681 0.1 0.683261 2004.0 0.185627 \n",
"RFC 0.591728 0.1 0.636501 1994.0 0.138040 \n",
"RFC_smote 0.495086 0.4 0.655639 2466.0 0.188538 \n",
"LightGBM_smote 0.590909 0.2 0.659149 1998.0 0.157524 \n",
"LightGBM 0.590909 0.2 0.659149 1998.0 0.157524 \n",
"XGB_smote 0.696560 0.3 0.686744 1482.0 0.156818 \n",
" Recall threshold ROC_AUC FN Precision \\\n",
"Run Name \n",
"LGBM_Shap002_scaled 0.592547 0.2 0.663112 1990.0 0.161019 \n",
"LGBM_Shap002 0.584562 0.2 0.658589 2029.0 0.158664 \n",
"RFC_newFEATURE_001 0.654791 0.1 0.651827 1686.0 0.138568 \n",
"XGB_Shap002 0.595209 0.1 0.683261 1977.0 0.183349 \n",
"RFC_newFEATURE_002 0.624283 0.1 0.661839 1835.0 0.151926 \n",
"RFC_newFEATURE 0.657658 0.1 0.653852 1672.0 0.139494 \n",
"XGB 0.589681 0.1 0.683261 2004.0 0.185627 \n",
"RFC 0.591728 0.1 0.636501 1994.0 0.138040 \n",
"RFC_smote 0.495086 0.4 0.655639 2466.0 0.188538 \n",
"LightGBM_smote 0.590909 0.2 0.659149 1998.0 0.157524 \n",
"LightGBM 0.590909 0.2 0.659149 1998.0 0.157524 \n",
"XGB_smote 0.696560 0.3 0.686744 1482.0 0.156818 \n",
"\n",
" F1 TN \n",
"Run Name \n",
"LGBM_Shap002 0.249585 41480.0 \n",
"RFC_newFEATURE_001 0.228731 36738.0 \n",
"XGB_Shap002 0.280341 43671.0 \n",
"RFC_newFEATURE_002 0.244379 39599.0 \n",
"RFC_newFEATURE 0.230168 36805.0 \n",
"XGB 0.282367 43984.0 \n",
"RFC 0.223857 38573.0 \n",
"RFC_smote 0.273081 46212.0 \n",
"LightGBM_smote 0.248739 41184.0 \n",
"LightGBM 0.248739 41184.0 \n",
"XGB_smote 0.256001 38327.0 \n"
" F1 TN \n",
"Run Name \n",
"LGBM_Shap002_scaled 0.253227 41540.0 \n",
"LGBM_Shap002 0.249585 41480.0 \n",
"RFC_newFEATURE_001 0.228731 36738.0 \n",
"XGB_Shap002 0.280341 43671.0 \n",
"RFC_newFEATURE_002 0.244379 39599.0 \n",
"RFC_newFEATURE 0.230168 36805.0 \n",
"XGB 0.282367 43984.0 \n",
"RFC 0.223857 38573.0 \n",
"RFC_smote 0.273081 46212.0 \n",
"LightGBM_smote 0.248739 41184.0 \n",
"LightGBM 0.248739 41184.0 \n",
"XGB_smote 0.256001 38327.0 \n"
]
}
],
Expand Down Expand Up @@ -2533,7 +2540,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "cebf98bc",
"id": "f5ffa864",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -3271,7 +3278,7 @@
},
{
"cell_type": "markdown",
"id": "0b3389b1",
"id": "d555a1ce",
"metadata": {},
"source": [
"<a name='7'></a>\n",
Expand All @@ -3280,7 +3287,7 @@
},
{
"cell_type": "markdown",
"id": "df6a7489",
"id": "a5654c2e",
"metadata": {},
"source": [
"## Filter not useful features"
Expand Down Expand Up @@ -3379,7 +3386,7 @@
{
"cell_type": "code",
"execution_count": 237,
"id": "b09617a1",
"id": "7faa22f0",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -3603,7 +3610,7 @@
},
{
"cell_type": "markdown",
"id": "b7b2f0ca",
"id": "6f9d46be",
"metadata": {},
"source": [
"### First attempt to improve feature selection and model training"
Expand Down Expand Up @@ -4648,7 +4655,7 @@
},
{
"cell_type": "markdown",
"id": "ce809a7d",
"id": "4e795d20",
"metadata": {},
"source": [
"### Second attempt to improve feature selection and model improvement"
Expand All @@ -4657,7 +4664,7 @@
{
"cell_type": "code",
"execution_count": 253,
"id": "c1d66850",
"id": "54a0b6a6",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -4847,7 +4854,7 @@
{
"cell_type": "code",
"execution_count": 228,
"id": "db117fe4",
"id": "c8e8fb39",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -4963,7 +4970,7 @@
{
"cell_type": "code",
"execution_count": 231,
"id": "339217b2",
"id": "3335d4b5",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -5097,7 +5104,7 @@
{
"cell_type": "code",
"execution_count": 251,
"id": "36e5f24d",
"id": "3be94358",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -5327,7 +5334,7 @@
{
"cell_type": "code",
"execution_count": 245,
"id": "0ab66442",
"id": "786f91bb",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -5569,6 +5576,49 @@
"new_X_train_002.head()"
]
},
{
"cell_type": "markdown",
"id": "884ca746",
"metadata": {},
"source": [
"## Run with Shap filtered and scaled data to assess impact on metrics"
]
},
{
"cell_type": "code",
"execution_count": 259,
"id": "ed32852c",
"metadata": {},
"outputs": [],
"source": [
"X_train_002_scale, X_test_002_scale = scale_data(new_X_train_002, new_X_test_002)"
]
},
{
"cell_type": "code",
"execution_count": 263,
"id": "ea1a18d6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Artifact PATH LGBM_Shap002_scaled_artifactPATH\n",
"{'TN': 41540, 'FP': 15079, 'FN': 1990, 'TP': 2894, 'FP_10_FN': 34979, 'Accuracy': 0.7224688226590573, 'F1': 0.2532265826661417, 'Precision': 0.16101930673788462, 'Recall': 0.5925470925470926, 'ROC_AUC': 0.6631115335216432, 'threshold': 0.2, 'time_in_s': 485.4484920501709}\n",
"{'subsample': 0.8, 'reg_lambda': 0.1, 'reg_alpha': 0.1, 'objective': 'binary', 'num_leaves': 31, 'n_estimators': 10000, 'metric': 'binary_logloss', 'learning_rate': 0.05, 'class_weight': 'balanced', 'boosting_type': 'gbdt'}\n",
"Active run_id: d9587cc928794dddac2b5407fd4412c6\n"
]
}
],
"source": [
"run_name = \"LGBM_Shap002_scaled\"\n",
"#LGBM_model_002_scale, LGBM_002_scale_params, time_LGBM_002 = train_LightGBM_model(X_train_002_scale, Y_train)\n",
"#LGBM_002_scale_metrics, best_metrics_LGBM_scale = generate_model_report(LGBM_model_002_scale, run_name, X_test_002_scale, Y_test, time_LGBM_002)\n",
"run_MLflow(experiment_name, run_name, LGBM_002_scale_metrics, \n",
" LGBM_002_scale_params, LGBM_model_002_scale, X_train_002_scale)"
]
},
{
"cell_type": "markdown",
"id": "8233f52e",
Expand Down
Loading

0 comments on commit 0e4e34c

Please sign in to comment.