Skip to content

Commit

Permalink
Adding reports
Browse files Browse the repository at this point in the history
  • Loading branch information
James Bristow committed Mar 1, 2024
1 parent 53a4f3d commit a50e569
Show file tree
Hide file tree
Showing 13 changed files with 29,685 additions and 0 deletions.
635 changes: 635 additions & 0 deletions deepchecks (1).html

Large diffs are not rendered by default.

635 changes: 635 additions & 0 deletions deepchecks (2).html

Large diffs are not rendered by default.

635 changes: 635 additions & 0 deletions deepchecks.html

Large diffs are not rendered by default.

672 changes: 672 additions & 0 deletions evidently_metric_report.html

Large diffs are not rendered by default.

672 changes: 672 additions & 0 deletions evidently_test_report.html

Large diffs are not rendered by default.

91 changes: 91 additions & 0 deletions get_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
os.environ['WHYLOGS_NO_ANALYTICS']='True'

from deepchecks.tabular.suites import full_suite
from evidently.metric_preset import (
DataDriftPreset, DataQualityPreset, TargetDriftPreset, RegressionPreset, ClassificationPreset
)

from evidently.report import Report
from evidently.test_preset import (
DataStabilityTestPreset, DataQualityTestPreset,
BinaryClassificationTestPreset, BinaryClassificationTopKTestPreset,
DataDriftTestPreset, MulticlassClassificationTestPreset,
RegressionTestPreset
)

from evidently.test_suite import TestSuite
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import whylogs as why
from whylogs.viz import NotebookProfileVisualizer

from ydata_profiling import ProfileReport


def run_why(df):
why_results = why.log(df)
prof_view = why_results.view()
visualization = NotebookProfileVisualizer()
visualization.set_profiles(target_profile_view=prof_view, reference_profile_view=prof_view)

visualization.write(
rendered_html=visualization.profile_summary(),
html_file_name=os.getcwd() + "/why_profile_summary",
)

visualization.write(
rendered_html=visualization.double_histogram(feature_name=df.columns),
html_file_name=os.getcwd() + "/why_double_histogram",
)

visualization.write(
rendered_html=visualization.feature_statistics(feature_name=df.columns),
html_file_name=os.getcwd() + "/why_feature_statistics",
)

def run_evidently(df):
evidently_test_report= TestSuite(tests=[
DataStabilityTestPreset(), DataQualityTestPreset(), BinaryClassificationTestPreset(),
DataDriftTestPreset(),
MulticlassClassificationTestPreset(), RegressionTestPreset()
])

evidently_test_report.run(current_data=df.iloc[:60], reference_data=df.iloc[60:], column_mapping=None)
evidently_test_report.save_html("evidently_test_report.html")

evidently_metric_report = Report(metrics=[
DataDriftPreset(), DataQualityPreset(), TargetDriftPreset(),
RegressionPreset(), ClassificationPreset()
])

evidently_metric_report.run(current_data=df.iloc[:60], reference_data=df.iloc[60:], column_mapping=None)
evidently_metric_report.save_html("evidently_metric_report.html")

def run_deepchecks(df):
suite = full_suite()
train_df = train_dataset=df.iloc[:60]
test_df = test_dataset=df.iloc[60:]

model = LogisticRegression().fit(train_df.drop(columns="target"), train_df["target"])
suite_result = suite.run(train_dataset=train_df, test_dataset=test_df, model=model)
suite_result.save_as_html("deepchecks.html")

def run_profiler(df):
profile = ProfileReport(df, title="Profiling Report")
profile.to_file("ydata_profiling.html")

def main():
iris = load_iris(as_frame=True)
df = iris["data"]
df["target"] = iris["target"]
df["prediction"] = iris["target"]

run_why(df)
run_evidently(df)
run_deepchecks(df)
run_profiler(df)

if __name__ == "__main__":
main()
5,134 changes: 5,134 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[tool.poetry]
name = "python-data-profiler-comp"
version = "0.1.0"
description = ""
authors = ["James Bristow <[email protected]>"]
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.11,<3.12"
ydata-profiling = "^4.6.5"
pandas = "<=2.0.3"
numpy = "<1.26"
matplotlib = "<3.8.0"
statsmodels = "^0.14.1"
seaborn = "<0.13"
pydantic = "^2.6.3"
evidently = "^0.4.16"
httpx = "^0.27.0"
scikit-learn = "<=1.3.0"
plotly = "^5.19.0"
deepchecks = "<0.17.3"
scipy = "<1.11.4"
whylogs = {extras = ["viz"], version = "^1.3.24"}
great-expectations = "^0.18.10"
kaleido = "0.2.1"
pycaret = "<=3.0.4"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
193 changes: 193 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
altair==4.2.2
annotated-types==0.6.0
anyio==4.3.0
appdirs==1.4.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
attrs==23.2.0
backcall==0.2.0
beautifulsoup4==4.12.3
bleach==6.1.0
blinker==1.7.0
category-encoders==2.6.3
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
cloudpickle==3.0.0
colorama==0.4.6
comm==0.2.1
contourpy==1.2.0
cryptography==42.0.5
cycler==0.12.1
Cython==3.0.8
dacite==1.8.1
dash==2.15.0
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
debugpy==1.8.1
decorator==5.1.1
deepchecks==0.17.2
defusedxml==0.7.1
deprecation==2.1.0
distro==1.9.0
dynaconf==3.2.4
entrypoints==0.4
evidently==0.4.16
Faker==23.3.0
fastjsonschema==2.19.1
filelock==3.13.1
Flask==3.0.2
fonttools==4.49.0
fsspec==2024.2.0
great-expectations==0.18.10
h11==0.14.0
htmlmin==0.1.12
httpcore==1.0.4
httpx==0.27.0
idna==3.6
ImageHash==4.3.1
imbalanced-learn==0.12.0
importlib-metadata==7.0.1
ipykernel==6.27.1
ipython==7.34.0
ipython-genutils==0.2.0
ipywidgets==7.8.1
iterative-telemetry==0.0.8
itsdangerous==2.1.2
jedi==0.19.1
Jinja2==3.1.3
joblib==1.3.2
jsonpatch==1.33
jsonpickle==3.0.3
jsonpointer==2.4
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
jupyter_client==8.6.0
jupyter_core==5.7.1
jupyter-server==1.16.0
jupyterlab_pygments==0.3.0
jupyterlab-widgets==1.1.7
kaleido==0.2.1
kiwisolver==1.4.5
lightgbm==4.3.0
litestar==2.6.1
llvmlite==0.41.1
makefun==1.15.2
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.21.0
matplotlib==3.7.5
matplotlib-inline==0.1.6
mdurl==0.1.2
mistune==3.0.2
msgspec==0.18.6
multidict==6.0.5
multimethod==1.11.2
mypy-extensions==1.0.0
nbclassic==1.0.0
nbclient==0.9.0
nbconvert==7.16.1
nbformat==5.9.2
nest-asyncio==1.6.0
networkx==3.2.1
nltk==3.8.1
notebook==6.5.4
notebook_shim==0.2.4
numba==0.58.1
numpy==1.24.4
orjson==3.9.15
packaging==23.2
pandas==1.5.3
pandocfilters==1.5.1
parso==0.8.3
patsy==0.5.6
pexpect==4.9.0
phik==0.12.4
pickleshare==0.7.5
Pillow==9.5.0
pip==23.3.1
platformdirs==3.11.0
plotly==5.19.0
plotly-resampler==0.9.2
pmdarima==2.0.4
polyfactory==2.14.1
prometheus_client==0.20.0
prompt-toolkit==3.0.43
protobuf==4.25.3
psutil==5.9.8
ptyprocess==0.7.0
pybars3==0.9.7
pycaret==3.0.0
pycparser==2.21
pydantic==2.6.3
pydantic_core==2.16.3
Pygments==2.17.2
PyMeta3==0.5.1
PyNomaly==0.3.3
pyod==1.1.3
pyparsing==3.1.1
python-dateutil==2.9.0
python-utils==3.8.2
pytz==2024.1
PyWavelets==1.5.0
PyYAML==6.0.1
pyzmq==23.2.1
referencing==0.33.0
regex==2023.12.25
requests==2.31.0
retrying==1.3.4
rich==13.7.1
rich-click==1.7.3
rpds-py==0.18.0
ruamel.yaml==0.17.17
schemdraw==0.18
scikit-base==0.7.3
scikit-learn==1.3.0
scikit-plot==0.3.7
scipy==1.11.3
seaborn==0.12.2
Send2Trash==1.8.2
setuptools==69.1.1
six==1.16.0
sktime==0.27.0
sniffio==1.3.1
soupsieve==2.5
statsmodels==0.14.1
tangled-up-in-unicode==0.2.0
tbats==1.1.3
tenacity==8.2.3
terminado==0.18.0
threadpoolctl==3.3.0
tinycss2==1.2.1
toolz==0.12.1
tornado==6.4
tqdm==4.66.2
traitlets==5.14.1
tsdownsample==0.1.2
typeguard==4.1.5
typer==0.9.0
types-requests==2.31.0.20240218
typing_extensions==4.10.0
typing-inspect==0.9.0
tzdata==2024.1
tzlocal==5.2
urllib3==2.2.1
uvicorn==0.27.1
visions==0.7.5
watchdog==4.0.0
wcwidth==0.2.13
webencodings==0.5.1
websocket-client==1.7.0
Werkzeug==3.0.1
whylabs-client==0.5.10
whylogs==1.3.24
whylogs-sketching==3.4.1.dev3
widgetsnbextension==3.6.6
wordcloud==1.9.3
wurlitzer==3.0.3
xxhash==3.4.1
ydata-profiling==4.6.5
yellowbrick==1.5
zipp==3.17.0
Loading

0 comments on commit a50e569

Please sign in to comment.