From 200a3ca557925278d1900fb5cb9d489a5181b271 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Fri, 10 Nov 2023 09:38:05 -0500 Subject: [PATCH] Increase test coverage / add path/URI distinction in PudlPaths() --- .../work-in-progress/CEMS_by_utility.ipynb | 2 +- .../work-in-progress/better-heatrates.ipynb | 2 +- .../work-in-progress/ferc714-output.ipynb | 2 +- .../work-in-progress/jupyterhub-test.ipynb | 2 +- notebooks/work-in-progress/state-demand.ipynb | 2 +- src/pudl/extract/xbrl.py | 5 +- src/pudl/workspace/setup.py | 15 ++-- test/unit/extract/xbrl_test.py | 79 +++++++++++++++++++ .../notebooks/validate_bf_eia923.ipynb | 2 +- .../notebooks/validate_fbp_ferc1.ipynb | 2 +- .../notebooks/validate_frc_eia923.ipynb | 2 +- .../notebooks/validate_fuel_ferc1.ipynb | 2 +- .../notebooks/validate_gens_eia860.ipynb | 2 +- .../notebooks/validate_gf_eia923.ipynb | 2 +- test/validate/notebooks/validate_mcoe.ipynb | 2 +- .../validate_plants_steam_ferc1.ipynb | 2 +- 16 files changed, 102 insertions(+), 23 deletions(-) diff --git a/notebooks/work-in-progress/CEMS_by_utility.ipynb b/notebooks/work-in-progress/CEMS_by_utility.ipynb index c8a085ac32..1d2a593dcc 100644 --- a/notebooks/work-in-progress/CEMS_by_utility.ipynb +++ b/notebooks/work-in-progress/CEMS_by_utility.ipynb @@ -47,7 +47,7 @@ "from pudl.workspace.setup import PudlPaths\n", "\n", "\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db())\n", "#display(pudl_engine)\n", diff --git a/notebooks/work-in-progress/better-heatrates.ipynb b/notebooks/work-in-progress/better-heatrates.ipynb index 4547c5ba3d..3be8d324e5 100644 --- a/notebooks/work-in-progress/better-heatrates.ipynb +++ b/notebooks/work-in-progress/better-heatrates.ipynb @@ -324,7 +324,7 @@ "from pudl.workspace.setup import PudlPaths\n", "\n", "# TODO(janrous): provide property for accessing ferc db?\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "\n", "API_KEY_EIA = os.environ[\"API_KEY_EIA\"]\n", diff --git a/notebooks/work-in-progress/ferc714-output.ipynb b/notebooks/work-in-progress/ferc714-output.ipynb index 396a764b19..65cc1da40e 100644 --- a/notebooks/work-in-progress/ferc714-output.ipynb +++ b/notebooks/work-in-progress/ferc714-output.ipynb @@ -142,7 +142,7 @@ "source": [ "from pudl.workspace.setup import PudlPaths\n", "\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "display(ferc1_engine)\n", "\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", diff --git a/notebooks/work-in-progress/jupyterhub-test.ipynb b/notebooks/work-in-progress/jupyterhub-test.ipynb index 18d0e9a037..3a59d03a7a 100644 --- a/notebooks/work-in-progress/jupyterhub-test.ipynb +++ b/notebooks/work-in-progress/jupyterhub-test.ipynb @@ -51,7 +51,7 @@ "source": [ "from pudl.workspace.setup import PudlPaths\n", "\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)" ] diff --git a/notebooks/work-in-progress/state-demand.ipynb b/notebooks/work-in-progress/state-demand.ipynb index d319093948..a3158054c0 100644 --- a/notebooks/work-in-progress/state-demand.ipynb +++ b/notebooks/work-in-progress/state-demand.ipynb @@ -113,7 +113,7 @@ "#HARVEST_ACCOUNT_ID = os.environ[\"HARVEST_ACCOUNT_ID\"]\n", "\n", "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)" ] diff --git a/src/pudl/extract/xbrl.py b/src/pudl/extract/xbrl.py index 1288fa8878..108fa5e025 100644 --- a/src/pudl/extract/xbrl.py +++ b/src/pudl/extract/xbrl.py @@ -2,7 +2,6 @@ import io from datetime import date from pathlib import Path -from urllib.parse import urlparse from dagster import Field, Noneable, op from ferc_xbrl_extractor.cli import run_main @@ -86,9 +85,7 @@ def xbrl2sqlite(context) -> None: logger.info(f"Dataset ferc{form}_xbrl is disabled, skipping") continue - sql_path = Path( - urlparse(PudlPaths().sqlite_db(f"ferc{form.value}_xbrl")).path - ).resolve() + sql_path = PudlPaths().sqlite_db_path(f"ferc{form.value}_xbrl") if sql_path.exists(): if clobber: diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 7cf93b3cb7..6afaa751ea 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -73,20 +73,23 @@ def data_dir(self) -> Path: return self.input_dir @property - def pudl_db(self) -> Path: + def pudl_db(self) -> str: """Returns url of locally stored pudl sqlite database.""" - return self.sqlite_db("pudl") + return self.sqlite_db_uri("pudl") - def sqlite_db(self, name: str) -> str: - """Returns url of locally stored pudl slqlite database with given name. + def sqlite_db_uri(self, name: str) -> str: + """Returns url of locally stored pudl sqlite database with given name. The name is expected to be the name of the database without the .sqlite suffix. E.g. pudl, ferc1 and so on. """ - db_path = self.output_dir / f"{name}.sqlite" # SQLite URI has 3 slashes - 2 to separate URI scheme, 1 to separate creds # sqlite://{credentials}/{db_path} - return f"sqlite:///{db_path}" + return f"sqlite:///{self.sqlite_db_path(name)}" + + def sqlite_db_path(self, name: str) -> Path: + """Return path to locally stored SQLite DB file.""" + return self.output_dir / f"{name}.sqlite" def output_file(self, filename: str) -> Path: """Path to file in PUDL output directory.""" diff --git a/test/unit/extract/xbrl_test.py b/test/unit/extract/xbrl_test.py index 8e74069630..0aeb2a920d 100644 --- a/test/unit/extract/xbrl_test.py +++ b/test/unit/extract/xbrl_test.py @@ -128,6 +128,85 @@ def test_xbrl2sqlite(settings, forms, mocker): ) +def test_xbrl2sqlite_db_exists_no_clobber(mocker): + convert_form_mock = mocker.MagicMock() + mocker.patch("pudl.extract.xbrl.convert_form", new=convert_form_mock) + + # Mock datastore object to allow comparison + mock_datastore = mocker.MagicMock() + mocker.patch("pudl.extract.xbrl.FercXbrlDatastore", return_value=mock_datastore) + + ferc1_sqlite_path = PudlPaths().output_dir / "ferc1_xbrl.sqlite" + ferc1_sqlite_path.touch() + settings = FercToSqliteSettings( + ferc1_dbf_to_sqlite_settings=Ferc1DbfToSqliteSettings(), + ferc1_xbrl_to_sqlite_settings=Ferc1XbrlToSqliteSettings(), + ferc2_xbrl_to_sqlite_settings=None, + ferc6_xbrl_to_sqlite_settings=None, + ferc60_xbrl_to_sqlite_settings=None, + ferc714_xbrl_to_sqlite_settings=None, + ) + # Construct xbrl2sqlite op context + context = build_op_context( + resources={ + "ferc_to_sqlite_settings": settings, + "datastore": "datastore", + }, + config={ + "workers": 10, + "batch_size": 20, + "clobber": False, + }, + ) + + with pytest.raises(RuntimeError, match="Found existing DB"): + xbrl2sqlite(context) + + +def test_xbrl2sqlite_db_exists_yes_clobber(mocker): + convert_form_mock = mocker.MagicMock() + mocker.patch("pudl.extract.xbrl.convert_form", new=convert_form_mock) + + # Mock datastore object to allow comparison + mock_datastore = mocker.MagicMock() + mocker.patch("pudl.extract.xbrl.FercXbrlDatastore", return_value=mock_datastore) + + ferc1_sqlite_path = PudlPaths().output_dir / "ferc1_xbrl.sqlite" + ferc1_sqlite_path.touch() + + # mock the db path so we can assert it gets clobbered + mock_db_path = mocker.MagicMock(spec=ferc1_sqlite_path) + mock_pudl_paths = mocker.MagicMock( + spec=PudlPaths(), sqlite_db_path=lambda _x: mock_db_path + ) + mocker.patch("pudl.extract.xbrl.PudlPaths", return_value=mock_pudl_paths) + + settings = FercToSqliteSettings( + ferc1_dbf_to_sqlite_settings=Ferc1DbfToSqliteSettings(), + ferc1_xbrl_to_sqlite_settings=Ferc1XbrlToSqliteSettings(), + ferc2_xbrl_to_sqlite_settings=None, + ferc6_xbrl_to_sqlite_settings=None, + ferc60_xbrl_to_sqlite_settings=None, + ferc714_xbrl_to_sqlite_settings=None, + ) + + context = build_op_context( + resources={ + "ferc_to_sqlite_settings": settings, + "datastore": "datastore", + }, + config={ + "workers": 10, + "batch_size": 20, + "clobber": True, + }, + ) + + xbrl2sqlite(context) + + mock_db_path.unlink.assert_any_call() + + def test_convert_form(mocker): """Test convert_form method is properly calling extractor.""" extractor_mock = mocker.MagicMock() diff --git a/test/validate/notebooks/validate_bf_eia923.ipynb b/test/validate/notebooks/validate_bf_eia923.ipynb index 750b0e926e..c8ae9e9ad6 100644 --- a/test/validate/notebooks/validate_bf_eia923.ipynb +++ b/test/validate/notebooks/validate_bf_eia923.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, diff --git a/test/validate/notebooks/validate_fbp_ferc1.ipynb b/test/validate/notebooks/validate_fbp_ferc1.ipynb index fb4fd2920d..19bea73485 100644 --- a/test/validate/notebooks/validate_fbp_ferc1.ipynb +++ b/test/validate/notebooks/validate_fbp_ferc1.ipynb @@ -81,7 +81,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, diff --git a/test/validate/notebooks/validate_frc_eia923.ipynb b/test/validate/notebooks/validate_frc_eia923.ipynb index e1834129b7..63bf3862c4 100644 --- a/test/validate/notebooks/validate_frc_eia923.ipynb +++ b/test/validate/notebooks/validate_frc_eia923.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri(\"ferc1\"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, diff --git a/test/validate/notebooks/validate_fuel_ferc1.ipynb b/test/validate/notebooks/validate_fuel_ferc1.ipynb index cc86703c20..7cc54302b8 100644 --- a/test/validate/notebooks/validate_fuel_ferc1.ipynb +++ b/test/validate/notebooks/validate_fuel_ferc1.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri("ferc1"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] diff --git a/test/validate/notebooks/validate_gens_eia860.ipynb b/test/validate/notebooks/validate_gens_eia860.ipynb index 17b2916e44..59ad58531d 100644 --- a/test/validate/notebooks/validate_gens_eia860.ipynb +++ b/test/validate/notebooks/validate_gens_eia860.ipynb @@ -78,7 +78,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri("ferc1"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, diff --git a/test/validate/notebooks/validate_gf_eia923.ipynb b/test/validate/notebooks/validate_gf_eia923.ipynb index 216d07e8b8..9dede77d30 100644 --- a/test/validate/notebooks/validate_gf_eia923.ipynb +++ b/test/validate/notebooks/validate_gf_eia923.ipynb @@ -78,7 +78,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri("ferc1"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] diff --git a/test/validate/notebooks/validate_mcoe.ipynb b/test/validate/notebooks/validate_mcoe.ipynb index e8c884f558..4b948fc073 100644 --- a/test/validate/notebooks/validate_mcoe.ipynb +++ b/test/validate/notebooks/validate_mcoe.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri("ferc1"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] diff --git a/test/validate/notebooks/validate_plants_steam_ferc1.ipynb b/test/validate/notebooks/validate_plants_steam_ferc1.ipynb index e435974dc0..0004d00081 100644 --- a/test/validate/notebooks/validate_plants_steam_ferc1.ipynb +++ b/test/validate/notebooks/validate_plants_steam_ferc1.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "from pudl.workspace.setup import PudlPaths\n", - "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db_uri("ferc1"))\n", "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ]