Skip to content

Commit

Permalink
API working
Browse files Browse the repository at this point in the history
MarkoBrie committed Feb 15, 2024
1 parent c4c4245 commit bf5af78
Showing 11 changed files with 100,794 additions and 984 deletions.
1,999 changes: 1,934 additions & 65 deletions .ipynb_checkpoints/2_Model_selection-checkpoint.ipynb

Large diffs are not rendered by default.

303 changes: 144 additions & 159 deletions 1_EDA.ipynb

Large diffs are not rendered by default.

1,034 changes: 986 additions & 48 deletions 2_Model_selection.ipynb

Large diffs are not rendered by default.

54 changes: 25 additions & 29 deletions 3_STREAMlit_dashboard.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@

def request_prediction(model_uri, data):
headers = {"Content-Type": "application/json"}
st.write(data)

data_json = data

st.write(data_json)
@@ -27,48 +27,44 @@ def main():
'Quelle API souhaitez vous utiliser',
['MLflow', 'Option 2', 'Option 3'])

st.title('Median House Price Prediction')
st.title('Prédiction du Credit Score avec ID')

selected_radio = st.radio('Select an option', ['Option 1', 'Option 2', 'Option 3'])
# load file into pandas
ids_test = pd.read_csv('../2_INPUT_DATA/3_SPLIT/ids_test.csv')
X_train = pd.read_csv('../2_INPUT_DATA/3_SPLIT/X_train.csv')
X_train["ID"] = ids_test
X_train.set_index("ID", inplace=True)
st.write(X_train.shape)

# List of IDs
id_list = ['100002', '100003', '100004', '100005', '100006', '100007', '100008', '100009']
id_list = ids_test.iloc[:,0].values.tolist()

selected_id = st.selectbox('Search and select an ID', options=id_list, index=0, format_func=lambda x: x if x else 'Search...')
#get selected_id index in ids_test and use the index to get the data

revenu_med = st.number_input('Revenu médian dans le secteur (en 10K de dollars)',
min_value=0., value=3.87, step=1.)
st.write(selected_id)
st.write(X_train.loc[selected_id].shape)
st.write(X_train.loc[selected_id].values.tolist())
data = { "inputs":[X_train.loc[selected_id].values.tolist()]}
st.write(data)

predict_btn = st.button('Prédire')
if predict_btn:
data = pd.DataFrame([[revenu_med, age_med, nb_piece_med, nb_chambre_moy,
taille_pop, occupation_moy, latitude, longitude]])#.to_json(orient='records')

data = {"dataframe_records": [[revenu_med, age_med, nb_piece_med, nb_chambre_moy,
taille_pop, occupation_moy, latitude, longitude]]}

st.write(data)
st.write("after")
data = { "inputs":[[0, 0, 1, 1, 63000.0, 310500.0, 15232.5, 310500.0, 0.026392, 16263, -214.0, -8930.0, -573, 0.0, 1, 1, 0, 1, 1, 0, 2.0, 2, 2, 11, 0, 0, 0, 0, 1, 1, 0.0, 0.0765011930557638, 0.0005272652387098, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]}



pred = None

if api_choice == 'MLflow':
st.write(MLFLOW_URI)
#st.write(data)
pred = request_prediction(MLFLOW_URI, data)#[0] * 100000
st.write(pred)
st.write(pred["prediction"])
elif api_choice == 'Cortex':
pred = request_prediction(CORTEX_URI, data)[0] * 100000
elif api_choice == 'Ray Serve':
pred = request_prediction(RAY_SERVE_URI, data)[0] * 100000
st.write(
'Le prix médian d\'une habitation est de {:.2f}'.format(pred["prediction"]))
#'Le prix médian d\'une habitation est de {:.2f}'.format(pred["prediction"][0]))


st.write(MLFLOW_URI)
#st.write(data)
pred = request_prediction(MLFLOW_URI, data)#[0] * 100000
st.write(pred)
st.write(pred["prediction"])

st.write(
'Le score crédit est de {:.2f}'.format(pred["prediction"]))


if __name__ == '__main__':
main()
514 changes: 17 additions & 497 deletions 4_Data_Drift_with_EvidentlyAI.ipynb

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions EvidentlyAI_DataDrift.html

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ click = "*"
streamlit = "*"
mlflow = "2.9.2"
lightgbm = "4.1.0"
pydantic = "*"

[dev-packages]

333 changes: 167 additions & 166 deletions Pipfile.lock

Large diffs are not rendered by default.

38 changes: 24 additions & 14 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import uvicorn
from fastapi import FastAPI
import numpy as np
#import pickle # pipfile does not lock
import mlflow
import lightgbm
import os
#import streamlit as st
#import cloudpickle
#import pickle
from typing import List
from pydantic import BaseModel # for data validation

# load environment variables
port = os.environ["PORT"]
@@ -17,26 +17,36 @@
Visit this URL at port 8501 for the streamlit interface.''',
version="0.1.0",)

# Path to the .pkl file containing the serialized model
#model_file_path = "model.pkl"

# Load the model from the .pkl file
#with open(model_file_path, 'rb') as f:
# model = pickle.load(f)
# Pydantic model for the input data
class DataPoint(BaseModel):
data_point: List[float]

# 3. Expose the prediction functionality, make a prediction from the passed
# JSON data and return the predicted flower species with the confidence
@app.post('/predict')
def predict_credit_score():
def predict_credit_score(data: DataPoint):

print("predict_credit_score function")
#print(data)
print([data.data_point])

#if len(data) != 239:
# raise HTTPException(status_code=400, detail="Expected 239 data points")

#data_point = {"data_point": data_point}

#data_point = np.array(data_point) #.reshape(1, -1)

sklearn_pyfunc = mlflow.lightgbm.load_model(model_uri="LightGBM")
data = [[0, 0, 1, 1, 63000.0, 310500.0, 15232.5, 310500.0, 0.026392, 16263, -214.0, -8930.0, -573, 0.0, 1, 1, 0, 1, 1, 0, 2.0, 2, 2, 11, 0, 0, 0, 0, 1, 1, 0.0, 0.0765011930557638, 0.0005272652387098, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]
#data = [[0, 0, 1, 1, 63000.0, 310500.0, 15232.5, 310500.0, 0.026392, 16263, -214.0, -8930.0, -573, 0.0, 1, 1, 0, 1, 1, 0, 2.0, 2, 2, 11, 0, 0, 0, 0, 1, 1, 0.0, 0.0765011930557638, 0.0005272652387098, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]

prediction = sklearn_pyfunc.predict_proba([data.data_point]).max()
#print(prediction)
#prediction = 0.7

prediction = sklearn_pyfunc.predict_proba(data).max()
print(prediction)
return {
'prediction': prediction,
'probability': 0.9
'probability': 0.8
}

@app.get("/")
48,745 changes: 48,745 additions & 0 deletions random_forest_baseline.csv

Large diffs are not rendered by default.

48,745 changes: 48,745 additions & 0 deletions random_forest_baseline_engineered.csv

Large diffs are not rendered by default.

0 comments on commit bf5af78

Please sign in to comment.