-
-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathmain.py
109 lines (89 loc) · 3.93 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import logging
import yaml
import mlflow
import mlflow.sklearn
from steps.ingest import Ingestion
from steps.clean import Cleaner
from steps.train import Trainer
from steps.predict import Predictor
from sklearn.metrics import classification_report
# Set up logging
logging.basicConfig(level=logging.INFO,format='%(asctime)s:%(levelname)s:%(message)s')
def main():
# Load data
ingestion = Ingestion()
train, test = ingestion.load_data()
logging.info("Data ingestion completed successfully")
# Clean data
cleaner = Cleaner()
train_data = cleaner.clean_data(train)
test_data = cleaner.clean_data(test)
logging.info("Data cleaning completed successfully")
# Prepare and train model
trainer = Trainer()
X_train, y_train = trainer.feature_target_separator(train_data)
trainer.train_model(X_train, y_train)
trainer.save_model()
logging.info("Model training completed successfully")
# Evaluate model
predictor = Predictor()
X_test, y_test = predictor.feature_target_separator(test_data)
accuracy, class_report, roc_auc_score = predictor.evaluate_model(X_test, y_test)
logging.info("Model evaluation completed successfully")
# Print evaluation results
print("\n============= Model Evaluation Results ==============")
print(f"Model: {trainer.model_name}")
print(f"Accuracy Score: {accuracy:.4f}, ROC AUC Score: {roc_auc_score:.4f}")
print(f"\n{class_report}")
print("=====================================================\n")
def train_with_mlflow():
with open('config.yml', 'r') as file:
config = yaml.safe_load(file)
mlflow.set_experiment("Model Training Experiment")
with mlflow.start_run() as run:
# Load data
ingestion = Ingestion()
train, test = ingestion.load_data()
logging.info("Data ingestion completed successfully")
# Clean data
cleaner = Cleaner()
train_data = cleaner.clean_data(train)
test_data = cleaner.clean_data(test)
logging.info("Data cleaning completed successfully")
# Prepare and train model
trainer = Trainer()
X_train, y_train = trainer.feature_target_separator(train_data)
trainer.train_model(X_train, y_train)
trainer.save_model()
logging.info("Model training completed successfully")
# Evaluate model
predictor = Predictor()
X_test, y_test = predictor.feature_target_separator(test_data)
accuracy, class_report, roc_auc_score = predictor.evaluate_model(X_test, y_test)
report = classification_report(y_test, trainer.pipeline.predict(X_test), output_dict=True)
logging.info("Model evaluation completed successfully")
# Tags
mlflow.set_tag('Model developer', 'prsdm')
mlflow.set_tag('preprocessing', 'OneHotEncoder, Standard Scaler, and MinMax Scaler')
# Log metrics
model_params = config['model']['params']
mlflow.log_params(model_params)
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("roc", roc_auc_score)
mlflow.log_metric('precision', report['weighted avg']['precision'])
mlflow.log_metric('recall', report['weighted avg']['recall'])
mlflow.sklearn.log_model(trainer.pipeline, "model")
# Register the model
model_name = "insurance_model"
model_uri = f"runs:/{run.info.run_id}/model"
mlflow.register_model(model_uri, model_name)
logging.info("MLflow tracking completed successfully")
# Print evaluation results
print("\n============= Model Evaluation Results ==============")
print(f"Model: {trainer.model_name}")
print(f"Accuracy Score: {accuracy:.4f}, ROC AUC Score: {roc_auc_score:.4f}")
print(f"\n{class_report}")
print("=====================================================\n")
if __name__ == "__main__":
# main()
train_with_mlflow()