diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index b808b84c42..79088f646a 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -43,6 +43,9 @@ jobs: conda config --show printenv | sort + - name: Make input, output and dagster dirs + run: mkdir -p ${{ env.PUDL_OUTPUT }} ${{ env.PUDL_INPUT}} ${{ env.DAGSTER_HOME }} + - name: Lint and build PUDL documentation with Sphinx run: | pip install --no-deps --editable . @@ -82,6 +85,9 @@ jobs: conda config --show printenv | sort + - name: Make input, output and dagster dirs + run: mkdir -p ${{ env.PUDL_OUTPUT }} ${{ env.PUDL_INPUT}} ${{ env.DAGSTER_HOME }} + - name: Log SQLite3 version run: | which sqlite3 diff --git a/.github/workflows/zenodo-cache-sync.yml b/.github/workflows/zenodo-cache-sync.yml index 10564ede56..22da3fa684 100644 --- a/.github/workflows/zenodo-cache-sync.yml +++ b/.github/workflows/zenodo-cache-sync.yml @@ -41,8 +41,6 @@ jobs: - name: Checkout desired branch uses: actions/checkout@v4 - with: - ref: ${{ env.GITHUB_REF }} - name: Install Conda environment using mamba uses: mamba-org/setup-micromamba@v1 diff --git a/.gitignore b/.gitignore index 173bc5da8b..a582a409d5 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ docs/data_dictionaries/pudl_db.rst .ipynb_checkpoints/ .cache/ +.ruff_cache/ +.mypy_cache/ .pytest_cache/* .DS_Store build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 20c72d2024..614d22aaea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,11 +16,14 @@ repos: - id: check-added-large-files # Don't accidentally commit giant files. - id: check-merge-conflict # Watch for lingering merge markers. - id: check-yaml # Validate all YAML files. + - id: check-toml - id: check-case-conflict # Avoid case sensitivity in file names. - id: debug-statements # Watch for lingering debugger calls. - id: mixed-line-ending # Use Unix line-endings to avoid big no-op CSV diffs. args: ["--fix=lf"] + - id: trailing-whitespace - id: name-tests-test # Follow PyTest naming convention. + - id: end-of-file-fixer #################################################################################### # Formatters: hooks that re-write Python & documentation files diff --git a/Makefile b/Makefile index 68a91bc709..bd98c22fe8 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ etl_fast_yml := src/pudl/package_data/settings/etl_fast.yml etl_full_yml := src/pudl/package_data/settings/etl_full.yml # We use mamba locally, but micromamba in CI, so choose the right binary: -ifdef GITHUB_ACTION +ifdef GITHUB_ACTIONS mamba := micromamba else mamba := mamba @@ -39,7 +39,7 @@ conda-clean: # Regenerate the conda lockfile and render platform specific conda environments. conda-lock.yml: pyproject.toml - ${mamba} run --name base ${mamba} install --yes conda-lock prettier + ${mamba} run --name base ${mamba} install --quiet --yes conda-lock prettier ${mamba} run --name base conda-lock \ --${mamba} \ --file=pyproject.toml \ @@ -53,6 +53,7 @@ conda-lock.yml: pyproject.toml # Create the pudl-dev conda environment based on the universal lockfile .PHONY: pudl-dev pudl-dev: + ${mamba} run --name base ${mamba} install --quiet --yes conda-lock ${mamba} run --name base ${mamba} env remove --name pudl-dev ${mamba} run --name base conda-lock install \ --name pudl-dev \ diff --git a/README.rst b/README.rst index 8eea0e93b4..f56642a3af 100644 --- a/README.rst +++ b/README.rst @@ -52,11 +52,11 @@ What data is available? PUDL currently integrates data from: -* `EIA Form 860 `__: 2001-2022 +* `EIA Form 860 `__: 2001 - 2022 * `EIA Form 860m `__: 2023-06 -* `EIA Form 861 `__: 2001-2022 -* `EIA Form 923 `__: 2001-2022 -* `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2022 +* `EIA Form 861 `__: 2001 - 2022 +* `EIA Form 923 `__: 2001 - 2023-08 +* `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995 - 2022 * `FERC Form 1 `__: 1994-2021 * `FERC Form 714 `__: 2006-2020 * `US Census Demographic Profile 1 Geodatabase `__: 2010 diff --git a/devtools/datasette/fly/fly.toml b/devtools/datasette/fly/fly.toml index 4b8923dacf..0c982386e4 100644 --- a/devtools/datasette/fly/fly.toml +++ b/devtools/datasette/fly/fly.toml @@ -31,4 +31,4 @@ primary_region = "bos" timeout = 2000 [deploy] -wait_timeout = "15m" \ No newline at end of file +wait_timeout = "15m" diff --git a/devtools/datasette/fly/run.sh b/devtools/datasette/fly/run.sh index 9516d73d7a..c17f3bdc86 100755 --- a/devtools/datasette/fly/run.sh +++ b/devtools/datasette/fly/run.sh @@ -7,4 +7,4 @@ find /data/ -name '*.sqlite' -delete mv all_dbs.tar.zst /data zstd -f -d /data/all_dbs.tar.zst -o /data/all_dbs.tar tar -xf /data/all_dbs.tar --directory /data -datasette serve --host 0.0.0.0 /data/*.sqlite --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $PORT \ No newline at end of file +datasette serve --host 0.0.0.0 /data/*.sqlite --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $PORT diff --git a/devtools/datasette/publish.py b/devtools/datasette/publish.py index a5b3b3123f..f9197fdbeb 100644 --- a/devtools/datasette/publish.py +++ b/devtools/datasette/publish.py @@ -17,13 +17,15 @@ Apart from that: the Dockerfile and dataset-specific metadata.yml/inspect-data.json are generated by this script. """ - import json import logging import secrets +import sys from pathlib import Path from subprocess import check_call, check_output +import click + from pudl.metadata.classes import DatasetteMetadata from pudl.workspace.setup import PudlPaths @@ -46,7 +48,7 @@ """ -def make_dockerfile(): +def make_dockerfile() -> str: """Write a dockerfile from template, to use in fly deploy. We write this from template so we can generate a datasette secret. This way @@ -56,7 +58,7 @@ def make_dockerfile(): return DOCKERFILE_TEMPLATE.format(datasette_secret=datasette_secret) -def inspect_data(datasets, pudl_out): +def inspect_data(datasets: list[str], pudl_out: Path) -> str: """Pre-inspect databases to generate some metadata for Datasette. This is done in the image build process in datasette-publish-fly, but since @@ -80,43 +82,99 @@ def inspect_data(datasets, pudl_out): return inspect_output -def metadata(pudl_out) -> str: +def metadata(pudl_out: Path) -> str: """Return human-readable metadata for Datasette.""" return DatasetteMetadata.from_data_source_ids(pudl_out).to_yaml() -def main(): +@click.command(context_settings={"help_option_names": ["-h", "--help"]}) +@click.option( + "--fly", + "-f", + "deploy", + flag_value="fly", + help="Deploy Datasette to fly.io.", + default=True, +) +@click.option( + "--local", + "-l", + "deploy", + flag_value="local", + help="Deploy Datasette locally for testing or debugging purposes.", +) +@click.option( + "--metadata", + "-m", + "deploy", + flag_value="metadata", + help="Generate the Datasette metadata.yml in current directory, but do not deploy.", +) +def deploy_datasette(deploy: str) -> int: """Generate deployment files and run the deploy.""" - fly_dir = Path(__file__).parent.absolute() / "fly" - docker_path = fly_dir / "Dockerfile" - inspect_path = fly_dir / "inspect-data.json" - metadata_path = fly_dir / "metadata.yml" - pudl_out = PudlPaths().pudl_output - datasets = [str(p.name) for p in pudl_out.glob("*.sqlite")] - logging.info(f"Inspecting DBs for datasette: {datasets}...") - inspect_output = inspect_data(datasets, pudl_out) - with inspect_path.open("w") as f: - f.write(json.dumps(inspect_output)) - - logging.info("Writing metadata...") - with metadata_path.open("w") as f: - f.write(metadata(pudl_out)) - - logging.info("Writing Dockerfile...") - with docker_path.open("w") as f: - f.write(make_dockerfile()) - - logging.info(f"Compressing {datasets} and putting into docker context...") - check_call( - ["tar", "-a", "-czvf", fly_dir / "all_dbs.tar.zst"] + datasets, # noqa: S603 - cwd=pudl_out, + metadata_yml = metadata(pudl_out) + # Order the databases to highlight PUDL + datasets = ( + ["pudl.sqlite"] + + sorted(str(p.name) for p in pudl_out.glob("ferc*.sqlite")) + + ["censusdp1tract.sqlite"] ) - logging.info("Running fly deploy...") - check_call(["/usr/bin/env", "flyctl", "deploy"], cwd=fly_dir) # noqa: S603 - logging.info("Deploy finished!") + if deploy == "fly": + logging.info("Deploying to fly.io...") + fly_dir = Path(__file__).parent.absolute() / "fly" + docker_path = fly_dir / "Dockerfile" + inspect_path = fly_dir / "inspect-data.json" + metadata_path = fly_dir / "metadata.yml" + + logging.info(f"Inspecting DBs for datasette: {datasets}...") + inspect_output = inspect_data(datasets, pudl_out) + with inspect_path.open("w") as f: + f.write(json.dumps(inspect_output)) + + logging.info(f"Writing Datasette metadata to: {metadata_path}") + with metadata_path.open("w") as f: + f.write(metadata_yml) + + logging.info("Writing Dockerfile...") + with docker_path.open("w") as f: + f.write(make_dockerfile()) + + logging.info(f"Compressing {datasets} and putting into docker context...") + check_call( + ["tar", "-a", "-czvf", fly_dir / "all_dbs.tar.zst"] + datasets, # noqa: S603 + cwd=pudl_out, + ) + + logging.info("Running fly deploy...") + check_call(["/usr/bin/env", "flyctl", "deploy"], cwd=fly_dir) # noqa: S603 + logging.info("Deploy finished!") + + elif deploy == "local": + logging.info("Running Datasette locally...") + metadata_path = pudl_out / "metadata.yml" + logging.info(f"Writing Datasette metadata to: {metadata_path}") + with metadata_path.open("w") as f: + f.write(metadata_yml) + + check_call( + ["/usr/bin/env", "datasette", "serve", "-m", "metadata.yml"] + datasets, # noqa: S603 + cwd=pudl_out, + ) + + elif deploy == "metadata": + metadata_path = Path.cwd() / "metadata.yml" + logging.info(f"Writing Datasette metadata to: {metadata_path}") + with metadata_path.open("w") as f: + f.write(metadata_yml) + + else: + logging.error(f"Unrecognized deployment destination: {deploy=}") + return 1 + + return 0 if __name__ == "__main__": - main() + sys.exit(deploy_datasette()) diff --git a/devtools/materialize_asset.py b/devtools/materialize_asset.py index 99f3729883..ee3aa1f9ce 100755 --- a/devtools/materialize_asset.py +++ b/devtools/materialize_asset.py @@ -38,7 +38,7 @@ def main(asset_id): config={ "resources": { "dataset_settings": { - "config": etl_fast_settings.dict(), + "config": etl_fast_settings.model_dump(), }, }, }, diff --git a/devtools/sqlite_to_duckdb.py b/devtools/sqlite_to_duckdb.py new file mode 100644 index 0000000000..da49084829 --- /dev/null +++ b/devtools/sqlite_to_duckdb.py @@ -0,0 +1,69 @@ +"""A naive script for converting SQLite to DuckDB.""" +import logging +from pathlib import Path + +import click +import duckdb + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@click.command() +@click.argument("sqlite_path", type=click.Path(exists=True, resolve_path=True)) +@click.argument( + "duckdb_path", type=click.Path(resolve_path=True, writable=True, allow_dash=False) +) +def convert_sqlite_to_duckdb(sqlite_path, duckdb_path): + """Convert an SQLite database to DuckDB format. + + Args: + sqlite_path (str): Path to the existing SQLite database file. + duckdb_path (str): Path to the new DuckDB database file (should not exist). + + Example: + python sqlite_to_duckdb.py sqlite.db duckdb.db + """ + sqlite_path = Path(sqlite_path) + duckdb_path = Path(duckdb_path) + + # Check if DuckDB file already exists + if duckdb_path.exists(): + click.echo( + f"Error: DuckDB file '{duckdb_path}' already exists. Please provide a new filename." + ) + return + + # Connect to DuckDB database + duckdb_conn = duckdb.connect(database=str(duckdb_path)) + duckdb_cursor = duckdb_conn.cursor() + + # Fetch table names from SQLite database using DuckDB + duckdb_cursor.execute(f"ATTACH DATABASE '{sqlite_path}' AS sqlite_db;") + duckdb_cursor.execute("SELECT name FROM main.sqlite_master WHERE type='table';") + table_names = [row[0] for row in duckdb_cursor.fetchall()] + + # Copy tables from SQLite to DuckDB + for table_name in table_names: + logger.info(f"Working on table: {table_name}") + # Fetch column names and types from SQLite table using DuckDB + duckdb_cursor.execute(f"PRAGMA table_info(sqlite_db.{table_name});") + columns_info = duckdb_cursor.fetchall() + column_definitions = ", ".join([f"{col[1]} {col[2]}" for col in columns_info]) + + # Create equivalent table in DuckDB + duckdb_cursor.execute(f"CREATE TABLE {table_name} ({column_definitions});") + + # Copy data from SQLite to DuckDB using DuckDB + duckdb_cursor.execute( + f"INSERT INTO {table_name} SELECT * FROM sqlite_db.{table_name};" # noqa: S608 + ) + + # Commit and close connections + duckdb_conn.commit() + duckdb_conn.close() + + +if __name__ == "__main__": + convert_sqlite_to_duckdb() diff --git a/docker/Dockerfile b/docker/Dockerfile index ca753ed347..9247f98c78 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,14 +1,14 @@ -FROM mambaorg/micromamba:1.5.1 +FROM mambaorg/micromamba:1.5.3 USER root SHELL [ "/bin/bash", "-exo", "pipefail", "-c" ] -# Install curl and js +# Install some linux packages # awscli requires unzip, less, groff and mandoc # hadolint ignore=DL3008 RUN apt-get update && \ - apt-get install --no-install-recommends -y git curl jq unzip less groff mandoc && \ + apt-get install --no-install-recommends -y git jq unzip less groff mandoc && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -16,20 +16,11 @@ RUN apt-get update && \ # hadolint ignore=DL3059 RUN printf '[GoogleCompute]\nservice_account = default' > /etc/boto.cfg -# Install awscli2 -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ - unzip awscliv2.zip && \ - ./aws/install - # Switch back to being non-root user and get into the home directory USER $MAMBA_USER ENV CONTAINER_HOME=/home/$MAMBA_USER WORKDIR ${CONTAINER_HOME} -# Install flyctl -RUN curl -L https://fly.io/install.sh | sh -ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" - ENV CONDA_PREFIX=${CONTAINER_HOME}/env ENV PUDL_REPO=${CONTAINER_HOME}/pudl ENV CONDA_RUN="micromamba run --prefix ${CONDA_PREFIX}" @@ -61,5 +52,18 @@ RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \ # Run the PUDL setup script so we know where to read and write data ${CONDA_RUN} pudl_setup + +# Install awscli2 +# Change back to root because the install script needs access to /usr/local/aws-cli +USER root +RUN ${CONDA_RUN} bash -c 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install' +USER $MAMBA_USER + +# Install flyctl +# hadolint ignore=DL3059 +RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' +ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" + + # Run the unit tests: CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml deleted file mode 100644 index 83e5c38a71..0000000000 --- a/docker/docker-compose.yml +++ /dev/null @@ -1,36 +0,0 @@ -# To rebuild and run this container: -# docker compose build && docker compose up -d && docker logs -f pudl_etl - -volumes: - pudl-in: - pudl-out: - dagster-home: - -services: - pudl-etl: - platform: linux/x86_64 # Need to specify x84 because M1 chips default to use ARM: https://stackoverflow.com/questions/68630526/lib64-ld-linux-x86-64-so-2-no-such-file-or-directory-error - environment: - - GCP_BILLING_PROJECT - - FLY_ACCESS_TOKEN - env_file: - - .env - build: - context: ../ - dockerfile: docker/Dockerfile - image: catalystcoop/pudl-etl:local-dev - volumes: - - type: volume - source: pudl-in - target: ${PUDL_INPUT} - consistency: delegated - - type: volume - source: pudl-out - target: ${PUDL_OUTPUT} - consistency: delegated - - type: volume - source: dagster-home - target: ${DAGSTER_HOME} - consistency: delegated - logging: - driver: local - command: ${CONDA_RUN} bash ./docker/local_pudl_etl.sh diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index 7249e8202c..df44bcc060 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -95,17 +95,23 @@ ETL_SUCCESS=${PIPESTATUS[0]} # if pipeline is successful, distribute + publish datasette if [[ $ETL_SUCCESS == 0 ]]; then - # Dump outputs to s3 bucket if branch is dev or build was triggered by a tag - if [ $GITHUB_ACTION_TRIGGER = "push" ] || [ $GITHUB_REF = "dev" ]; then - copy_outputs_to_distribution_bucket - ETL_SUCCESS=${PIPESTATUS[0]} - fi - # Deploy the updated data to datasette if [ $GITHUB_REF = "dev" ]; then python ~/devtools/datasette/publish.py 2>&1 | tee -a $LOGFILE ETL_SUCCESS=${PIPESTATUS[0]} fi + + # Compress the SQLite DBs for easier distribution + # Remove redundant multi-file EPA CEMS outputs prior to distribution + gzip --verbose $PUDL_OUTPUT/*.sqlite && \ + rm -rf $PUDL_OUTPUT/hourly_emissions_epacems/ + ETL_SUCCESS=${PIPESTATUS[0]} + + # Dump outputs to s3 bucket if branch is dev or build was triggered by a tag + if [ $GITHUB_ACTION_TRIGGER = "push" ] || [ $GITHUB_REF = "dev" ]; then + copy_outputs_to_distribution_bucket + ETL_SUCCESS=${PIPESTATUS[0]} + fi fi # Notify slack about entire pipeline's success or failure; diff --git a/docker/local_pudl_etl.sh b/docker/local_pudl_etl.sh deleted file mode 100644 index 1056d0b715..0000000000 --- a/docker/local_pudl_etl.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/bash -# This script runs the entire ETL and validation tests in a docker container. -# It is mostly used for local debugging of our docker deployment and the gcp_pudl_etl.sh script. - -set -x - -function run_pudl_etl() { - pudl_setup \ - && alembic upgrade head \ - && ferc_to_sqlite \ - --loglevel DEBUG \ - --gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ - $PUDL_SETTINGS_YML \ - && pudl_etl \ - --loglevel DEBUG \ - --gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ - $PUDL_SETTINGS_YML \ - && pytest \ - --gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \ - --etl-settings=$PUDL_SETTINGS_YML \ - --live-dbs test -} - -# Run the ETL and save the logs. -# 2>&1 redirects stderr to stdout. -run_pudl_etl 2>&1 | tee $LOGFILE - -# Notify the ETL completion status. -if [[ ${PIPESTATUS[0]} == 0 ]]; then - echo "The ETL and tests succesfully ran!" -else - echo "Oh bummer the ETL and tests failed :/" -fi diff --git a/docs/conf.py b/docs/conf.py index 78edc44dd7..6721aec6c8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,6 +10,7 @@ import datetime import importlib.metadata +import os import shutil from pathlib import Path @@ -18,6 +19,11 @@ from pudl.metadata.resources import RESOURCE_METADATA DOCS_DIR = Path(__file__).parent.resolve() +if os.environ.get("READTHEDOCS"): + pudl_input = Path(os.environ["PUDL_INPUT"]) + pudl_input.mkdir(parents=True, exist_ok=True) + pudl_output = Path(os.environ["PUDL_OUTPUT"]) + pudl_output.mkdir(parents=True, exist_ok=True) # -- Path setup -------------------------------------------------------------- # We are building and installing the pudl package in order to get access to diff --git a/docs/data_access.rst b/docs/data_access.rst index ab79f0ab51..49f49e55d1 100644 --- a/docs/data_access.rst +++ b/docs/data_access.rst @@ -61,10 +61,13 @@ creates a REST API allowing the data in the database to be queried programmatica All the query parameters are stored in the URL so you can also share links to the data you've selected. -Note that only data that has been fully integrated into the SQLite databases are -available here. Currently this includes `the core PUDL database -`__ and our concatenation of `all historical FERC -Form 1 databases `__. +.. note:: + + The only SQLite database containing cleaned and integrated data is `the core PUDL database + `__. There are also several FERC SQLite databases + derived from their old Visual FoxPro and new XBRL data formats, which we publish as + SQLite to improve accessibility of the raw inputs, but they should generally not be + used directly if the data you need has integrated into the PUDL database. .. _access-nightly-builds: @@ -82,44 +85,55 @@ version of Datasette (see above). These nightly build outputs can be accessed us AWS CLI, or programmatically via the S3 API. They can also be downloaded directly over HTTPS using the following links: -* `PUDL SQLite DB `__ +* `PUDL SQLite DB `__ * `EPA CEMS Hourly Emissions Parquet (1995-2022) `__ -* `Census DP1 SQLite DB (2010) `__ +* `Census DP1 SQLite DB (2010) `__ * Raw FERC Form 1: - * `FERC-1 SQLite derived from DBF (1994-2020) `__ - * `FERC-1 SQLite derived from XBRL (2021-2022) `__ + * `FERC-1 SQLite derived from DBF (1994-2020) `__ + * `FERC-1 SQLite derived from XBRL (2021-2022) `__ * `FERC-1 Datapackage (JSON) describing SQLite derived from XBRL `__ * `FERC-1 XBRL Taxonomy Metadata as JSON (2021-2022) `__ * Raw FERC Form 2: - * `FERC-2 SQLite derived from DBF (1996-2020) `__ - * `FERC-2 SQLite derived from XBRL (2021-2022) `__ + * `FERC-2 SQLite derived from DBF (1996-2020) `__ + * `FERC-2 SQLite derived from XBRL (2021-2022) `__ * `FERC-2 Datapackage (JSON) describing SQLite derived from XBRL `__ * `FERC-2 XBRL Taxonomy Metadata as JSON (2021-2022) `__ * Raw FERC Form 6: - * `FERC-6 SQLite derived from DBF (2000-2020) `__ - * `FERC-6 SQLite derived from XBRL (2021-2022) `__ + * `FERC-6 SQLite derived from DBF (2000-2020) `__ + * `FERC-6 SQLite derived from XBRL (2021-2022) `__ * `FERC-6 Datapackage (JSON) describing SQLite derived from XBRL `__ * `FERC-6 XBRL Taxonomy Metadata as JSON (2021-2022) `__ * Raw FERC Form 60: - * `FERC-60 SQLite derived from DBF (2006-2020) `__ - * `FERC-60 SQLite derived from XBRL (2021-2022) `__ + * `FERC-60 SQLite derived from DBF (2006-2020) `__ + * `FERC-60 SQLite derived from XBRL (2021-2022) `__ * `FERC-60 Datapackage (JSON) describing SQLite derived from XBRL `__ * `FERC-60 XBRL Taxonomy Metadata as JSON (2021) `__ * Raw FERC Form 714: - * `FERC-714 SQLite derived from XBRL (2021-2022) `__ + * `FERC-714 SQLite derived from XBRL (2021-2022) `__ * `FERC-714 Datapackage (JSON) describing SQLite derived from XBRL `__ * `FERC-714 XBRL Taxonomy Metadata as JSON (2021-2022) `__ +.. note:: + + To reduce network transfer times, we ``gzip`` the SQLite database files, which can + be quite large when uncompressed. To decompress them locally, you can use the + ``gunzip`` command. + + + .. code-block:: console + + $ gunzip *.sqlite.gz + .. _access-zenodo: diff --git a/docs/dev/clone_ferc1.rst b/docs/dev/clone_ferc1.rst index 9041daeee0..c52339fee9 100644 --- a/docs/dev/clone_ferc1.rst +++ b/docs/dev/clone_ferc1.rst @@ -1,23 +1,21 @@ =============================================================================== -Cloning the FERC Form 1 DB +Converting raw FERC data to SQLite =============================================================================== -FERC Form 1 is special. - -The :doc:`../data_sources/ferc1` is published in a particularly inaccessible format. -From 1994-2020 it used the proprietary `FoxPro database +FERC publishes its data (e.g. :doc:`../data_sources/ferc1`) in particularly difficult +to use formats. From 1994-2020 it used the proprietary `FoxPro database `__ binary format. Then in 2021 it switched to `XBRL `__, a dialect of XML used for financial reporting. -In addition to using two difficult to parse and very different file formats, the data -itself is unclean and poorly organized. As a result, very few people are currently able -to use it. This means that, while we have not yet integrated the vast majority of the -available data into PUDL, it's useful to just provide programmatic access to the bulk -raw data, independent of the cleaner subset of the data included within PUDL. +In addition to using two different difficult to parse file formats, the data itself is +unclean and poorly organized. As a result, very few people are currently able to use it. +This means that, while we have not yet integrated the vast majority of the available +data into PUDL, it's useful to just provide programmatic access to the bulk raw data, +independent of the cleaner subset of the data included within PUDL. -To provide that access, we've broken the :mod:`pudl.extract.ferc1` process -down into several distinct steps: +To provide that access, we've broken the :mod:`pudl.extract.ferc1` process down into +several distinct steps: #. Clone the 1994-2020 annual database from FoxPro (DBF) into a local file-based :mod:`sqlite3` database. @@ -30,26 +28,25 @@ down into several distinct steps: The FoxPro / XBRL derived FERC Form 1 databases include 100+ tables, containing 3000+ columns. -If you want direct access to the original FERC Form 1 database, you can just do the -database cloning and connect directly to the resulting database. This has become -especially useful since Microsoft recently discontinued the database driver that until -late 2018 had allowed users to load the FoxPro database files into Microsoft Access. +If you need to work with this relatively unprocessed data, we highly recommend +downloading it from one of our periodic data releases or our +:ref:`access-nightly-builds`. -Cloning the original FERC database is the first step in the PUDL -ETL process. This can be done using the dagster UI (see :ref:`run-dagster-ui`) -or with the ``ferc_to_sqlite`` script (see :ref:`run-cli`). +Cloning the original FERC database is the first step in the PUDL ETL process. This can +be done using the dagster UI (see :ref:`run-dagster-ui`) or with the ``ferc_to_sqlite`` +script (see :ref:`run-cli`). .. note:: We recommend using the Dagster UI to execute the ETL as it provides additional functionality for re-execution and viewing dependences. -Executing a ``ferc_to_sqlite`` job will create several outputs that you can -find in your workspace at: +Executing a ``ferc_to_sqlite`` job will create several outputs in your ``$PUDL_OUTPUT`` +directory. For example the FERC Form 1 outputs will include: - * ``pudl_output/ferc1.sqlite``: Data from 1994-2020 (FoxPro/DBF) - * ``pudl_output/ferc1_xbrl.sqlite``: Data from 2021 onward (XBRL) - * ``pudl_output/ferc1_xbrl_datapackage.json``: `Frictionless data package + * ``$PUDL_OUTPUT/ferc1_dbf.sqlite``: Data from 1994-2020 (FoxPro/DBF) + * ``$PUDL_OUTPUT/ferc1_xbrl.sqlite``: Data from 2021 onward (XBRL) + * ``$PUDL_OUTPUT/ferc1_xbrl_datapackage.json``: `Frictionless data package `__ descriptor for the XBRL derived database. * ``pudl_output/ferc1_xbrl_taxonomy_metadata.json``: A JSON version of the @@ -58,21 +55,3 @@ find in your workspace at: By default, the script pulls in all available years and tables of data. The output is roughly 1GB on disk. The ``ferc_to_sqlite`` jobs also extracts the XBRL data for FERC Form 1, 2, 6, 60 and 714. - -.. note:: - - This script pulls *all* of the FERC Form 1 DBF data into a *single* database, but - FERC distributes a *separate* database for each year. Virtually all the database - tables contain a ``report_year`` column that indicates which year they came from, - preventing collisions between records in the merged multi-year database. One notable - exception is the ``f1_respondent_id`` table, which maps ``respondent_id`` to the - names of the respondents. For that table, we have allowed the most recently reported - record to take precedence, overwriting previous mappings if they exist. - -.. note:: - - There are a handful of ``respondent_id`` values that appear in the FERC Form 1 - database tables but do not show up in ``f1_respondent_id``. This renders the foreign - key relationships between those tables invalid. During the database cloning process - we add these ``respondent_id`` values to the ``f1_respondent_id`` table with a - ``respondent_name`` indicating that the ID was filled in by PUDL. diff --git a/docs/dev/dev_setup.rst b/docs/dev/dev_setup.rst index 36618f3a84..e94ad413da 100644 --- a/docs/dev/dev_setup.rst +++ b/docs/dev/dev_setup.rst @@ -65,6 +65,15 @@ command. You only need to run it once, from within the cloned repo: $ git config --local merge.ours.driver true +.. note:: + + If there have been changes to the environment on a branch (e.g. ``dev``) that you + merge into your own branch, the lockfiles will need to be regenerated. This can be + done automatically by pushing the merged changes to your branch on GitHub, waiting a + couple of minutes for the ``update-conda-lockfile`` GitHub Action to run, and then + pulling the fresh lockfiles to your local development environment. You can also + regenerate the lockfiles locally (see below). + ------------------------------------------------------------------------------- Create the PUDL Dev Environment ------------------------------------------------------------------------------- diff --git a/docs/dev/run_the_etl.rst b/docs/dev/run_the_etl.rst index 1b4fc13f3c..7c1a6a6094 100644 --- a/docs/dev/run_the_etl.rst +++ b/docs/dev/run_the_etl.rst @@ -493,23 +493,21 @@ years. Additional Notes ---------------- -The commands above should result in a bunch of Python :mod:`logging` output -describing what the script is doing, and file outputs in the ``output`` -directory within your workspace. When the ETL is complete, you -should see new files at ``output/ferc1.sqlite`` and ``output/pudl.sqlite`` as -well as a new directory at ``output/hourly_emissions_epacems`` containing -nested directories named by year and state. - -If you need to re-run ``ferc_to_sqlite`` and want to overwrite -their previous outputs you can add ``--clobber`` (run ``ferc_to_sqlite --clobber``). -All of the PUDL scripts also have help messages if you want additional information -(run ``script_name --help``). +The commands above should result in a bunch of Python :mod:`logging` output describing +what the script is doing, and file outputs in your ``$PUDL_OUTPUT`` directory. When the +ETL is complete, you should see new files at e.g. ``$PUDL_OUTPUT/ferc1_dbf.sqlite`` and +``output/pudl.sqlite`` as well as a new directory at ``output/hourly_emissions_epacems`` +containing nested directories named by year and state. + +If you need to re-run ``ferc_to_sqlite`` and want to overwrite their previous outputs +you can add ``--clobber`` (run ``ferc_to_sqlite --clobber``). All of the PUDL scripts +also have help messages if you want additional information (run ``script_name --help``). .. note:: - The ``pudl_etl`` command does not have a ``--clobber`` option because - each etl run uses the same database file to read and write tables. - This enables re-running portions of the ETL. + The ``pudl_etl`` command does not have a ``--clobber`` option because each etl run + uses the same database file to read and write tables. This enables re-running + portions of the ETL. Foreign Keys ------------ diff --git a/docs/intro.rst b/docs/intro.rst index c5ab4c6f73..058b38ad1e 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -28,12 +28,15 @@ pages for each source: We also publish SQLite databases containing relatively pristine versions of our more difficult to parse inputs, especially the old Visual FoxPro (DBF, pre-2021) and new XBRL data (2021+) published by FERC: -* `FERC Form 1 (DBF) `__ -* `FERC Form 1 (XBRL) `__ -* `FERC Form 2 (XBRL) `__ -* `FERC Form 6 (XBRL) `__ -* `FERC Form 60 (XBRL) `__ -* `FERC Form 714 (XBRL) `__ +* `FERC Form 1 (DBF) `__ +* `FERC Form 1 (XBRL) `__ +* `FERC Form 2 (DBF) `__ +* `FERC Form 2 (XBRL) `__ +* `FERC Form 6 (DBF) `__ +* `FERC Form 6 (XBRL) `__ +* `FERC Form 60 (DBF) `__ +* `FERC Form 60 (XBRL) `__ +* `FERC Form 714 (XBRL) `__ To get started using PUDL data, visit our :doc:`data_access` page, or continue reading to learn more about the PUDL data processing pipeline. diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 2f3c936634..388343baca 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -72,8 +72,8 @@ Data Coverage * Updated :doc:`data_sources/eia860` to include final release data from 2022. * Updated :doc:`data_sources/eia861` to include final release data from 2022. -* Updated :doc:`data_sources/eia923` to include early release data from 2022 and - monthly YTD data as of April 2023. +* Updated :doc:`data_sources/eia923` to include final release data from 2022 and + monthly YTD data as of October 2023. * Updated :doc:`data_sources/epacems` to switch from the old FTP server to the new CAMPD API, and to include 2022 data. Due to changes in the ETL, Alaska, Puerto Rico and Hawaii are now included in CEMS processing. See issue :issue:`1264` & PRs @@ -331,6 +331,10 @@ Miscellaneous * Switched to using ``conda-lock`` and ``Makefile`` to manage testing and python environment. Moved away from packaging PUDL for distribution via PyPI and ``conda-forge`` and toward treating it as an application. See :pr:`2968` +* The two-point-ohening: We now require Pandas v2 (see :pr:`2320`), SQLAlchemy v2 (see + :pr:`2267`) and Pydantic v2 (see :pr:`3051`). +* Update the names of our FERC SQLite DBs to indicate what source data they come from. + See issue :issue:`3079` and` :pr:`3094`. .. _release-v2022.11.30: diff --git a/docs/templates/eia923_child.rst.jinja b/docs/templates/eia923_child.rst.jinja index af6bca536a..7f04f7b12f 100644 --- a/docs/templates/eia923_child.rst.jinja +++ b/docs/templates/eia923_child.rst.jinja @@ -35,7 +35,8 @@ in `EIA Form 423 replaced the earlier FERC Form 423). If you're interested in this earlier data, get in touch with us! -Monthly interim EIA-923 data releases are not yet integrated into PUDL. In addition, We +Monthly interim EIA-923 data are periodically integrated into PUDL as well. Incomplete +year-to-date data are excluded from the annualized tables to avoid confusion. We have not yet integrated tables reporting fuel stocks, data from Puerto Rico, or EIA-923 schedules 6, 7, and 8. {% endblock %} diff --git a/environments/conda-linux-64.lock.yml b/environments/conda-linux-64.lock.yml index f0f30381d5..3d6da6f452 100644 --- a/environments/conda-linux-64.lock.yml +++ b/environments/conda-linux-64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 88ab6b06102bd362f7a93aacfa6fd9a41a0824c3982e85120db6a645c2c00351 +# input_hash: 566bcbefc936d18bbf3a48ce8800e5d3209d9925447fc48984c419514bfaa6f6 channels: - conda-forge @@ -11,7 +11,7 @@ dependencies: - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - font-ttf-inconsolata=3.000=h77eed37_0 - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 + - font-ttf-ubuntu=0.83=h77eed37_1 - ld_impl_linux-64=2.40=h41732ed_0 - libboost-headers=1.83.0=ha770c72_0 - libstdcxx-ng=13.2.0=h7e041cc_3 @@ -26,7 +26,7 @@ dependencies: - libgcc-ng=13.2.0=h807b86a_3 - aws-c-common=0.9.8=hd590300_0 - bzip2=1.0.8=hd590300_5 - - c-ares=1.22.1=hd590300_0 + - c-ares=1.23.0=hd590300_0 - fribidi=1.0.10=h36c2ea0_0 - geos=3.12.1=h59595ed_0 - gettext=0.21.1=h27087fc_0 @@ -62,10 +62,10 @@ dependencies: - lzo=2.10=h516909a_1000 - ncurses=6.4=h59595ed_2 - nspr=4.35=h27087fc_0 - - openssl=3.2.0=hd590300_0 + - openssl=3.1.4=hd590300_0 - pixman=0.42.2=h59595ed_0 - pthread-stubs=0.4=h36c2ea0_1001 - - rdma-core=28.9=h59595ed_1 + - rdma-core=49.0=hd3aeb46_1 - snappy=1.1.10=h9fff704_0 - tzcode=2023c=h0b41bf4_0 - uriparser=0.9.7=hcb278e6_1 @@ -106,7 +106,7 @@ dependencies: - readline=8.2=h8228510_1 - s2n=1.3.56=h06160fa_0 - tk=8.6.13=noxft_h4845f30_101 - - ucx=1.15.0=h64cca9d_0 + - ucx=1.15.0=hae80064_1 - xorg-libsm=1.2.4=h7391055_0 - zeromq=4.3.5=h59595ed_0 - zlib=1.2.13=hd590300_5 @@ -125,7 +125,7 @@ dependencies: - libxslt=1.1.37=h0054252_1 - minizip=4.0.3=h0ab5242_0 - nodejs=20.9.0=hb753e55_0 - - nss=3.94=h1d7d5a4_0 + - nss=3.95=h1d7d5a4_0 - orc=1.9.0=h4b38347_4 - pandoc=3.1.3=h32600fe_0 - python=3.11.6=hab00c5b_0_cpython @@ -188,7 +188,7 @@ dependencies: - humanfriendly=10.0=pyhd8ed1ab_6 - hupper=1.12=pyhd8ed1ab_0 - hyperframe=6.0.1=pyhd8ed1ab_0 - - idna=3.5=pyhd8ed1ab_0 + - idna=3.6=pyhd8ed1ab_0 - ijson=3.2.3=pyhd8ed1ab_0 - imagesize=1.4.1=pyhd8ed1ab_0 - iniconfig=2.0.0=pyhd8ed1ab_0 @@ -238,7 +238,7 @@ dependencies: - psutil=5.9.5=py311h459d7ec_1 - ptyprocess=0.7.0=pyhd3deb0d_0 - pure_eval=0.2.2=pyhd8ed1ab_0 - - pyasn1=0.5.0=pyhd8ed1ab_0 + - pyasn1=0.5.1=pyhd8ed1ab_0 - pycparser=2.21=pyhd8ed1ab_0 - pygments=2.17.2=pyhd8ed1ab_0 - pyjwt=2.8.0=pyhd8ed1ab_0 @@ -259,7 +259,7 @@ dependencies: - regex=2023.10.3=py311h459d7ec_0 - rfc3986=2.0.0=pyhd8ed1ab_0 - rfc3986-validator=0.1.1=pyh9f0ad1d_0 - - rpds-py=0.13.1=py311h46250e7_0 + - rpds-py=0.13.2=py311h46250e7_0 - rtree=1.1.0=py311h3bb2b0f_0 - ruamel.yaml.clib=0.2.7=py311h459d7ec_2 - ruff=0.1.6=py311h7145743_0 @@ -284,12 +284,12 @@ dependencies: - toolz=0.12.0=pyhd8ed1ab_0 - toposort=1.10=pyhd8ed1ab_0 - tornado=6.3.3=py311h459d7ec_1 - - traitlets=5.13.0=pyhd8ed1ab_0 + - traitlets=5.14.0=pyhd8ed1ab_0 - types-python-dateutil=2.8.19.14=pyhd8ed1ab_0 - types-pyyaml=6.0.12.12=pyhd8ed1ab_0 - typing_extensions=4.8.0=pyha770c72_0 - typing_utils=0.1.0=pyhd8ed1ab_0 - - unicodecsv=0.14.1=py_1 + - unicodecsv=0.14.1=pyhd8ed1ab_2 - uri-template=1.3.0=pyhd8ed1ab_0 - uvloop=0.19.0=py311h460e60f_0 - validators=0.22.0=pyhd8ed1ab_0 @@ -298,7 +298,7 @@ dependencies: - webencodings=0.5.1=pyhd8ed1ab_2 - websocket-client=1.6.4=pyhd8ed1ab_0 - websockets=10.4=py311hd4cff14_1 - - wheel=0.41.3=pyhd8ed1ab_0 + - wheel=0.42.0=pyhd8ed1ab_0 - widgetsnbextension=4.0.9=pyhd8ed1ab_0 - wrapt=1.16.0=py311h459d7ec_0 - xlrd=2.0.1=pyhd8ed1ab_3 @@ -330,6 +330,7 @@ dependencies: - coloredlogs=14.0=pyhd8ed1ab_3 - comm=0.1.4=pyhd8ed1ab_0 - coverage=7.3.2=py311h459d7ec_0 + - curl=8.4.0=hca28451_0 - fonttools=4.45.1=py311h459d7ec_0 - gitdb=4.0.11=pyhd8ed1ab_0 - graphql-core=3.2.3=pyhd8ed1ab_0 @@ -338,7 +339,7 @@ dependencies: - h2=4.1.0=pyhd8ed1ab_0 - hdf5=1.14.2=nompi_h4f84152_100 - html5lib=1.1=pyh9f0ad1d_0 - - hypothesis=6.90.0=pyha770c72_0 + - hypothesis=6.91.0=pyha770c72_0 - importlib-metadata=6.8.0=pyha770c72_0 - importlib_resources=6.1.1=pyhd8ed1ab_0 - isodate=0.6.1=pyhd8ed1ab_0 @@ -378,7 +379,7 @@ dependencies: - python-slugify=8.0.1=pyhd8ed1ab_2 - pyu2f=0.1.5=pyhd8ed1ab_0 - qtpy=2.4.1=pyhd8ed1ab_0 - - referencing=0.31.0=pyhd8ed1ab_0 + - referencing=0.31.1=pyhd8ed1ab_0 - restructuredtext_lint=1.4.0=pyhd8ed1ab_0 - rfc3339-validator=0.1.4=pyhd8ed1ab_0 - rsa=4.9=pyhd8ed1ab_0 @@ -392,14 +393,15 @@ dependencies: - urllib3=1.26.18=pyhd8ed1ab_0 - watchdog=3.0.0=py311h38be061_1 - xerces-c=3.2.4=hac6953d_3 - - yarl=1.9.2=py311h459d7ec_1 + - yarl=1.9.3=py311h459d7ec_0 - addfips=0.4.0=pyhd8ed1ab_1 - aniso8601=9.0.1=pyhd8ed1ab_0 + - annotated-types=0.6.0=pyhd8ed1ab_0 - argon2-cffi-bindings=21.2.0=py311h459d7ec_4 - arrow=1.3.0=pyhd8ed1ab_0 - async-timeout=4.0.3=pyhd8ed1ab_0 - aws-c-s3=0.4.1=hfadff92_0 - - botocore=1.32.6=pyhd8ed1ab_0 + - botocore=1.33.5=pyhd8ed1ab_0 - branca=0.7.0=pyhd8ed1ab_1 - croniter=2.0.1=pyhd8ed1ab_0 - cryptography=41.0.5=py311h63ff55d_0 @@ -414,7 +416,7 @@ dependencies: - harfbuzz=8.3.0=h3d44ed6_0 - httpcore=1.0.2=pyhd8ed1ab_0 - importlib_metadata=6.8.0=hd8ed1ab_0 - - jsonschema-specifications=2023.11.1=pyhd8ed1ab_0 + - jsonschema-specifications=2023.11.2=pyhd8ed1ab_0 - jupyter_core=5.5.0=py311h38be061_0 - jupyter_server_terminals=0.4.4=pyhd8ed1ab_1 - kealib=1.5.2=hcd42e92_1 @@ -428,7 +430,7 @@ dependencies: - prompt_toolkit=3.0.41=hd8ed1ab_0 - psycopg2-binary=2.9.7=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_2 - - pydantic=1.10.13=py311h459d7ec_1 + - pydantic-core=2.14.5=py311h46250e7_0 - pyproj=3.6.1=py311h1facc83_4 - pytest-console-scripts=1.4.1=pyhd8ed1ab_0 - pytest-cov=4.1.0=pyhd8ed1ab_0 @@ -442,12 +444,12 @@ dependencies: - starlette=0.32.0.post1=pyhd8ed1ab_0 - tiledb=2.16.3=h8c794c1_3 - ukkonen=1.0.1=py311h9547e67_4 - - uvicorn=0.24.0=py311h38be061_0 + - uvicorn=0.24.0.post1=py311h38be061_0 - virtualenv=20.24.7=pyhd8ed1ab_0 - - watchfiles=0.20.0=py311h46250e7_2 + - watchfiles=0.21.0=py311h46250e7_0 - aiohttp=3.8.6=py311h459d7ec_1 - alembic=1.12.1=pyhd8ed1ab_0 - - arelle-release=2.17.4=pyhd8ed1ab_0 + - arelle-release=2.17.7=pyhd8ed1ab_0 - argon2-cffi=23.1.0=pyhd8ed1ab_0 - aws-crt-cpp=0.24.7=h97e63c7_6 - bottleneck=1.3.7=py311h1f0f07a_1 @@ -461,9 +463,9 @@ dependencies: - graphene=3.3=pyhd8ed1ab_0 - grpcio-status=1.59.2=pyhd8ed1ab_0 - h3-py=3.7.6=py311hb755f60_1 - - httpx=0.25.1=pyhd8ed1ab_0 + - httpx=0.25.2=pyhd8ed1ab_0 - identify=2.5.32=pyhd8ed1ab_0 - - ipython=8.18.0=pyh0d859eb_0 + - ipython=8.18.1=pyh31011fe_1 - isoduration=20.11.0=pyhd8ed1ab_0 - jsonschema=4.20.0=pyhd8ed1ab_0 - jupyter_client=8.6.0=pyhd8ed1ab_0 @@ -474,20 +476,21 @@ dependencies: - pandas=2.1.3=py311h320fe9a_0 - pango=1.50.14=ha41ecd1_2 - pybtex-docutils=1.0.3=py311h38be061_1 + - pydantic=2.5.2=pyhd8ed1ab_0 - pyopenssl=23.3.0=pyhd8ed1ab_0 - readthedocs-sphinx-ext=2.2.3=pyhd8ed1ab_0 - requests-toolbelt=0.10.1=pyhd8ed1ab_0 - responses=0.24.1=pyhd8ed1ab_0 - - s3transfer=0.7.0=pyhd8ed1ab_0 + - s3transfer=0.8.2=pyhd8ed1ab_0 - scipy=1.11.4=py311h64a7726_0 - secretstorage=3.3.3=py311h38be061_2 - shapely=2.0.2=py311h2032efe_1 - stevedore=5.1.0=pyhd8ed1ab_0 - typeguard=4.1.5=pyhd8ed1ab_1 - typer=0.9.0=pyhd8ed1ab_0 - - uvicorn-standard=0.24.0=h38be061_0 + - uvicorn-standard=0.24.0.post1=h38be061_0 - aws-sdk-cpp=1.11.182=h8beafcf_7 - - boto3=1.29.6=pyhd8ed1ab_0 + - boto3=1.33.5=pyhd8ed1ab_0 - cachecontrol-with-filecache=0.13.1=pyhd8ed1ab_0 - dagster=1.5.9=pyhd8ed1ab_0 - datasette=0.64.4=pyhd8ed1ab_1 @@ -496,7 +499,7 @@ dependencies: - frictionless=4.40.8=pyh6c4a22f_0 - gdal=3.8.0=py311h815a124_6 - geopandas-base=0.14.1=pyha770c72_0 - - google-auth=2.23.4=pyhca7485f_0 + - google-auth=2.24.0=pyhca7485f_0 - gql-with-requests=3.4.1=pyhd8ed1ab_0 - gtk2=2.24.33=h90689f9_2 - ipykernel=6.26.0=pyhf8b6a83_0 @@ -508,13 +511,14 @@ dependencies: - nbformat=5.9.2=pyhd8ed1ab_0 - pandera-core=0.17.2=pyhd8ed1ab_1 - pre-commit=3.5.0=pyha770c72_0 + - pydantic-settings=2.1.0=pyhd8ed1ab_1 - requests-oauthlib=1.3.1=pyhd8ed1ab_0 - scikit-learn=1.3.2=py311hc009520_1 - timezonefinder=6.2.0=py311h459d7ec_2 - - catalystcoop.ferc_xbrl_extractor=1.2.1=pyhd8ed1ab_0 + - catalystcoop.ferc_xbrl_extractor=1.3.1=pyhd8ed1ab_0 - conda-lock=2.5.1=pyhd8ed1ab_0 - dagster-graphql=1.5.9=pyhd8ed1ab_0 - - dagster-postgres=0.21.9=pyhd8ed1ab_0 + - dagster-postgres=0.21.9=pyhd8ed1ab_1 - fiona=1.9.5=py311hf8e0aa6_1 - google-api-core=2.14.0=pyhd8ed1ab_0 - google-auth-oauthlib=1.1.0=pyhd8ed1ab_0 @@ -539,12 +543,12 @@ dependencies: - tableschema=1.19.3=pyh9f0ad1d_0 - datapackage=1.15.2=pyh44b312d_0 - google-cloud-storage=2.13.0=pyhca7485f_0 - - jupyter_server=2.10.1=pyhd8ed1ab_0 + - jupyter_server=2.11.1=pyhd8ed1ab_0 - libarrow-dataset=14.0.1=h59595ed_3_cpu - libarrow-flight-sql=14.0.1=h61ff412_3_cpu - nbconvert-pandoc=7.11.0=pyhd8ed1ab_0 - gcsfs=2023.10.0=pyhd8ed1ab_0 - - jupyter-lsp=2.2.0=pyhd8ed1ab_0 + - jupyter-lsp=2.2.1=pyhd8ed1ab_0 - jupyter-resource-usage=1.0.1=pyhd8ed1ab_0 - jupyterlab_server=2.25.2=pyhd8ed1ab_0 - libarrow-substrait=14.0.1=h61ff412_3_cpu diff --git a/environments/conda-lock.yml b/environments/conda-lock.yml index 44de324243..148e867498 100644 --- a/environments/conda-lock.yml +++ b/environments/conda-lock.yml @@ -15,9 +15,9 @@ version: 1 metadata: content_hash: - linux-64: 88ab6b06102bd362f7a93aacfa6fd9a41a0824c3982e85120db6a645c2c00351 - osx-64: 407bb59ef7f138fbfd77c40e38a21952ba1e13e25f2a6a4e43f5bb21abb3fcdf - osx-arm64: 3f9ca7f82365963841501c752f82c8cd7577658349f7f07a6c4b77f764b97b41 + linux-64: 566bcbefc936d18bbf3a48ce8800e5d3209d9925447fc48984c419514bfaa6f6 + osx-64: 39e56673d0def5503b315f36cec2b3e4bfd804758ec3bcea79dad417c6b146a2 + osx-arm64: c275fe8ff3012ad83a98252ba570f9b278f142720b73f42abfd4c4a1107f67e2 channels: - url: conda-forge used_env_vars: [] @@ -355,6 +355,45 @@ package: sha256: 201c040b6ee0045805a777f75f37a8648eb8dfd4725d62a4fcddc24d7d6c2a9f category: dev optional: true + - name: annotated-types + version: 0.6.0 + manager: conda + platform: linux-64 + dependencies: + python: ">=3.7" + typing-extensions: ">=4.0.0" + url: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.6.0-pyhd8ed1ab_0.conda + hash: + md5: 997c29372bdbe2afee073dff71f35923 + sha256: 3a2c98154d95cfd54daba6b7d507d31f5ba07ac2ad955c44eb041b66563193cd + category: main + optional: false + - name: annotated-types + version: 0.6.0 + manager: conda + platform: osx-64 + dependencies: + python: ">=3.7" + typing-extensions: ">=4.0.0" + url: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.6.0-pyhd8ed1ab_0.conda + hash: + md5: 997c29372bdbe2afee073dff71f35923 + sha256: 3a2c98154d95cfd54daba6b7d507d31f5ba07ac2ad955c44eb041b66563193cd + category: main + optional: false + - name: annotated-types + version: 0.6.0 + manager: conda + platform: osx-arm64 + dependencies: + python: ">=3.7" + typing-extensions: ">=4.0.0" + url: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.6.0-pyhd8ed1ab_0.conda + hash: + md5: 997c29372bdbe2afee073dff71f35923 + sha256: 3a2c98154d95cfd54daba6b7d507d31f5ba07ac2ad955c44eb041b66563193cd + category: main + optional: false - name: anyascii version: 0.3.2 manager: conda @@ -497,7 +536,7 @@ package: category: main optional: false - name: arelle-release - version: 2.17.4 + version: 2.17.7 manager: conda platform: linux-64 dependencies: @@ -510,14 +549,14 @@ package: python: ">=3.8" python-dateutil: 2.* regex: "" - url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.17.4-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.17.7-pyhd8ed1ab_0.conda hash: - md5: 66972cbec7556aa94aba3da76b408f19 - sha256: a30a66c040021c396a99bf862ad78181a4888e67b2ac51ac7e21422c4165986c + md5: b42bbf2e318b6bbbd9de2d81ecf8ed50 + sha256: 3094446e601ad9160677c2bb5b75b9946c81b679bebf42bf52c126e71d76fb43 category: main optional: false - name: arelle-release - version: 2.17.4 + version: 2.17.7 manager: conda platform: osx-64 dependencies: @@ -530,14 +569,14 @@ package: lxml: 4.* openpyxl: 3.* pyparsing: 3.* - url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.17.4-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.17.7-pyhd8ed1ab_0.conda hash: - md5: 66972cbec7556aa94aba3da76b408f19 - sha256: a30a66c040021c396a99bf862ad78181a4888e67b2ac51ac7e21422c4165986c + md5: b42bbf2e318b6bbbd9de2d81ecf8ed50 + sha256: 3094446e601ad9160677c2bb5b75b9946c81b679bebf42bf52c126e71d76fb43 category: main optional: false - name: arelle-release - version: 2.17.4 + version: 2.17.7 manager: conda platform: osx-arm64 dependencies: @@ -550,10 +589,10 @@ package: lxml: 4.* openpyxl: 3.* pyparsing: 3.* - url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.17.4-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.17.7-pyhd8ed1ab_0.conda hash: - md5: 66972cbec7556aa94aba3da76b408f19 - sha256: a30a66c040021c396a99bf862ad78181a4888e67b2ac51ac7e21422c4165986c + md5: b42bbf2e318b6bbbd9de2d81ecf8ed50 + sha256: 3094446e601ad9160677c2bb5b75b9946c81b679bebf42bf52c126e71d76fb43 category: main optional: false - name: argon2-cffi @@ -1875,52 +1914,52 @@ package: category: main optional: false - name: boto3 - version: 1.29.6 + version: 1.33.5 manager: conda platform: linux-64 dependencies: - botocore: ">=1.32.6,<1.33.0" + botocore: ">=1.33.5,<1.34.0" jmespath: ">=0.7.1,<2.0.0" python: ">=3.7" - s3transfer: ">=0.7.0,<0.8.0" - url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.29.6-pyhd8ed1ab_0.conda + s3transfer: ">=0.8.2,<0.9.0" + url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.33.5-pyhd8ed1ab_0.conda hash: - md5: 0cbc42e6f9557edfea7f552c644027d7 - sha256: 7e3c31d99afff810f0d68b4d7c957be34917d1d4bfc76a34620dee0bc35eec1d + md5: 7485d3ee00269cd33baa2ad64a0923ee + sha256: babecba07e296dc4cd26580427508e7734c522d1488a7f94a1f13204de3ca856 category: main optional: false - name: boto3 - version: 1.29.6 + version: 1.33.5 manager: conda platform: osx-64 dependencies: python: ">=3.7" jmespath: ">=0.7.1,<2.0.0" - s3transfer: ">=0.7.0,<0.8.0" - botocore: ">=1.32.6,<1.33.0" - url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.29.6-pyhd8ed1ab_0.conda + s3transfer: ">=0.8.2,<0.9.0" + botocore: ">=1.33.5,<1.34.0" + url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.33.5-pyhd8ed1ab_0.conda hash: - md5: 0cbc42e6f9557edfea7f552c644027d7 - sha256: 7e3c31d99afff810f0d68b4d7c957be34917d1d4bfc76a34620dee0bc35eec1d + md5: 7485d3ee00269cd33baa2ad64a0923ee + sha256: babecba07e296dc4cd26580427508e7734c522d1488a7f94a1f13204de3ca856 category: main optional: false - name: boto3 - version: 1.29.6 + version: 1.33.5 manager: conda platform: osx-arm64 dependencies: python: ">=3.7" jmespath: ">=0.7.1,<2.0.0" - s3transfer: ">=0.7.0,<0.8.0" - botocore: ">=1.32.6,<1.33.0" - url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.29.6-pyhd8ed1ab_0.conda + s3transfer: ">=0.8.2,<0.9.0" + botocore: ">=1.33.5,<1.34.0" + url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.33.5-pyhd8ed1ab_0.conda hash: - md5: 0cbc42e6f9557edfea7f552c644027d7 - sha256: 7e3c31d99afff810f0d68b4d7c957be34917d1d4bfc76a34620dee0bc35eec1d + md5: 7485d3ee00269cd33baa2ad64a0923ee + sha256: babecba07e296dc4cd26580427508e7734c522d1488a7f94a1f13204de3ca856 category: main optional: false - name: botocore - version: 1.32.6 + version: 1.33.5 manager: conda platform: linux-64 dependencies: @@ -1928,14 +1967,14 @@ package: python: ">=3.7" python-dateutil: ">=2.1,<3.0.0" urllib3: ">=1.25.4,<1.27" - url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.32.6-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.33.5-pyhd8ed1ab_0.conda hash: - md5: a6747e9f4cb2ca858735017cf783fe08 - sha256: 534d61c7d2c2184d59b828dc582600482ed12c08922125f07f454f5d91d85573 + md5: 352c39ba5cd9ea01996358f0748e102e + sha256: 56566ea8f3a48c24190c1dcf50681c0a84b26821c335c21b5c3c5d238e4bdb14 category: main optional: false - name: botocore - version: 1.32.6 + version: 1.33.5 manager: conda platform: osx-64 dependencies: @@ -1943,14 +1982,14 @@ package: python-dateutil: ">=2.1,<3.0.0" jmespath: ">=0.7.1,<2.0.0" urllib3: ">=1.25.4,<1.27" - url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.32.6-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.33.5-pyhd8ed1ab_0.conda hash: - md5: a6747e9f4cb2ca858735017cf783fe08 - sha256: 534d61c7d2c2184d59b828dc582600482ed12c08922125f07f454f5d91d85573 + md5: 352c39ba5cd9ea01996358f0748e102e + sha256: 56566ea8f3a48c24190c1dcf50681c0a84b26821c335c21b5c3c5d238e4bdb14 category: main optional: false - name: botocore - version: 1.32.6 + version: 1.33.5 manager: conda platform: osx-arm64 dependencies: @@ -1958,10 +1997,10 @@ package: python-dateutil: ">=2.1,<3.0.0" jmespath: ">=0.7.1,<2.0.0" urllib3: ">=1.25.4,<1.27" - url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.32.6-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.33.5-pyhd8ed1ab_0.conda hash: - md5: a6747e9f4cb2ca858735017cf783fe08 - sha256: 534d61c7d2c2184d59b828dc582600482ed12c08922125f07f454f5d91d85573 + md5: 352c39ba5cd9ea01996358f0748e102e + sha256: 56566ea8f3a48c24190c1dcf50681c0a84b26821c335c21b5c3c5d238e4bdb14 category: main optional: false - name: bottleneck @@ -2207,37 +2246,37 @@ package: category: main optional: false - name: c-ares - version: 1.22.1 + version: 1.23.0 manager: conda platform: linux-64 dependencies: libgcc-ng: ">=12" - url: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.22.1-hd590300_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.23.0-hd590300_0.conda hash: - md5: 8430bd266c7b2cfbda403f7585d5ee86 - sha256: d41cf87938ba66de538b91afed3ece9b4cf5ed082a7d1c1add46b70f482f34b9 + md5: d459949bc10f64dee1595c176c2e6291 + sha256: 6b0eee827bade11c2964a05867499a50ad2a9d1b14dfe18fb867a3bc9357f56f category: main optional: false - name: c-ares - version: 1.22.1 + version: 1.23.0 manager: conda platform: osx-64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/osx-64/c-ares-1.22.1-h10d778d_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/c-ares-1.23.0-h10d778d_0.conda hash: - md5: 7040d0624b78a81c8d52f22b662d7c35 - sha256: e52123d4d1e880ad883da1fa6301fa318e87cf42b6228833177d41053f7288b4 + md5: 8da823fabbad661eefc48b779d89a4ac + sha256: d1080366254a32bd1ff23f10fcfe61bfb91e2af19e71047fc2ffddd062a59033 category: main optional: false - name: c-ares - version: 1.22.1 + version: 1.23.0 manager: conda platform: osx-arm64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.22.1-h93a5062_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.23.0-h93a5062_0.conda hash: - md5: f9d38cc3908c066e50b184cdcab12929 - sha256: 75f0222f76c9848ef9c3892300d057cb8285f28341d2f149d1fc10373242969c + md5: b187f2b99e52905042d661f824666964 + sha256: de5385280dcad805428068adb1f4a7eb1e6ec8987e2f25c4ff5766e3fec3b4a2 category: main optional: false - name: ca-certificates @@ -2610,7 +2649,7 @@ package: category: main optional: false - name: catalystcoop.ferc_xbrl_extractor - version: 1.2.1 + version: 1.3.1 manager: conda platform: linux-64 dependencies: @@ -2620,56 +2659,56 @@ package: lxml: ">=4.9.1,<5" numpy: ">=1.16,<2" pandas: ">=1.5,<2.2" - pydantic: ">=1.9,<3" + pydantic: ">=2,<3" python: ">=3.10,<3.13" sqlalchemy: ">=1.4,<3" stringcase: ">=1.2,<2" - url: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.2.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.3.1-pyhd8ed1ab_0.conda hash: - md5: 901c0be7848920eeaeb14bce747c589c - sha256: f70614208da7b61b41ead6d2260ca3b0d6c0785388b09f7aa4615b56fbf3ce37 + md5: 223cdad8b8eee98aae17835e4e34103d + sha256: d0588a3009fd8d2fbde979f3a518f7a1383f052f01a46022c541551adc413da8 category: main optional: false - name: catalystcoop.ferc_xbrl_extractor - version: 1.2.1 + version: 1.3.1 manager: conda platform: osx-64 dependencies: sqlalchemy: ">=1.4,<3" lxml: ">=4.9.1,<5" + pydantic: ">=2,<3" python: ">=3.10,<3.13" coloredlogs: ">=14.0,<15.1" frictionless: ">=4.4,<5" numpy: ">=1.16,<2" arelle-release: ">=2.3,<3" pandas: ">=1.5,<2.2" - pydantic: ">=1.9,<3" stringcase: ">=1.2,<2" - url: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.2.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.3.1-pyhd8ed1ab_0.conda hash: - md5: 901c0be7848920eeaeb14bce747c589c - sha256: f70614208da7b61b41ead6d2260ca3b0d6c0785388b09f7aa4615b56fbf3ce37 + md5: 223cdad8b8eee98aae17835e4e34103d + sha256: d0588a3009fd8d2fbde979f3a518f7a1383f052f01a46022c541551adc413da8 category: main optional: false - name: catalystcoop.ferc_xbrl_extractor - version: 1.2.1 + version: 1.3.1 manager: conda platform: osx-arm64 dependencies: sqlalchemy: ">=1.4,<3" lxml: ">=4.9.1,<5" + pydantic: ">=2,<3" python: ">=3.10,<3.13" coloredlogs: ">=14.0,<15.1" frictionless: ">=4.4,<5" numpy: ">=1.16,<2" arelle-release: ">=2.3,<3" pandas: ">=1.5,<2.2" - pydantic: ">=1.9,<3" stringcase: ">=1.2,<2" - url: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.2.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.3.1-pyhd8ed1ab_0.conda hash: - md5: 901c0be7848920eeaeb14bce747c589c - sha256: f70614208da7b61b41ead6d2260ca3b0d6c0785388b09f7aa4615b56fbf3ce37 + md5: 223cdad8b8eee98aae17835e4e34103d + sha256: d0588a3009fd8d2fbde979f3a518f7a1383f052f01a46022c541551adc413da8 category: main optional: false - name: cchardet @@ -3664,6 +3703,58 @@ package: sha256: 00c9b389b51b6e951a1f639aa04dceca9e329e144275c79b4f6baacd3fb90345 category: main optional: false + - name: curl + version: 8.4.0 + manager: conda + platform: linux-64 + dependencies: + krb5: ">=1.21.2,<1.22.0a0" + libcurl: 8.4.0 + libgcc-ng: ">=12" + libssh2: ">=1.11.0,<2.0a0" + libzlib: ">=1.2.13,<1.3.0a0" + openssl: ">=3.1.3,<4.0a0" + zstd: ">=1.5.5,<1.6.0a0" + url: https://conda.anaconda.org/conda-forge/linux-64/curl-8.4.0-hca28451_0.conda + hash: + md5: 2bcf7689cae931dd35d9a45626f49fce + sha256: 373c50b5b668cf39a71d17a42a96144d5efc1e62e7d81c1dd830e2493cefc8cc + category: main + optional: false + - name: curl + version: 8.4.0 + manager: conda + platform: osx-64 + dependencies: + krb5: ">=1.21.2,<1.22.0a0" + libcurl: 8.4.0 + libssh2: ">=1.11.0,<2.0a0" + libzlib: ">=1.2.13,<1.3.0a0" + openssl: ">=3.1.3,<4.0a0" + zstd: ">=1.5.5,<1.6.0a0" + url: https://conda.anaconda.org/conda-forge/osx-64/curl-8.4.0-h726d00d_0.conda + hash: + md5: e1de44cac6e7774dd2c1e074f5d637a9 + sha256: 32cb23c91dd4cd88d3e6c7adb38ea3d1a1e5da79c63a20ec27d3d0924fcf644c + category: main + optional: false + - name: curl + version: 8.4.0 + manager: conda + platform: osx-arm64 + dependencies: + krb5: ">=1.21.2,<1.22.0a0" + libcurl: 8.4.0 + libssh2: ">=1.11.0,<2.0a0" + libzlib: ">=1.2.13,<1.3.0a0" + openssl: ">=3.1.3,<4.0a0" + zstd: ">=1.5.5,<1.6.0a0" + url: https://conda.anaconda.org/conda-forge/osx-arm64/curl-8.4.0-h2d989ff_0.conda + hash: + md5: ae975c2ea5334bd8a8ddecb5013a30c6 + sha256: d0fa5d1a7a6d0e9dcf930db1e4a750991244567ea5e09a15a00c163a52113465 + category: main + optional: false - name: cycler version: 0.12.1 manager: conda @@ -3918,10 +4009,10 @@ package: dagster: 1.5.9.* psycopg2-binary: "" python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-0.21.9-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-0.21.9-pyhd8ed1ab_1.conda hash: - md5: 18c5dd009bd4d99ec38003583134c9fc - sha256: 83ad5a4eca4698b1258398bcd405665bbd8e41464124221cf477bb78bdc22100 + md5: 8c1a941fe77b920b1c7933a7a0c6bf2e + sha256: 0e947f376d6878bd8e505932277e84c373da492a38d2c4ef9d96fc25f5327845 category: main optional: false - name: dagster-postgres @@ -3932,10 +4023,10 @@ package: psycopg2-binary: "" python: ">=3.8" dagster: 1.5.9.* - url: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-0.21.9-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-0.21.9-pyhd8ed1ab_1.conda hash: - md5: 18c5dd009bd4d99ec38003583134c9fc - sha256: 83ad5a4eca4698b1258398bcd405665bbd8e41464124221cf477bb78bdc22100 + md5: 8c1a941fe77b920b1c7933a7a0c6bf2e + sha256: 0e947f376d6878bd8e505932277e84c373da492a38d2c4ef9d96fc25f5327845 category: main optional: false - name: dagster-postgres @@ -3946,10 +4037,10 @@ package: psycopg2-binary: "" python: ">=3.8" dagster: 1.5.9.* - url: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-0.21.9-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-0.21.9-pyhd8ed1ab_1.conda hash: - md5: 18c5dd009bd4d99ec38003583134c9fc - sha256: 83ad5a4eca4698b1258398bcd405665bbd8e41464124221cf477bb78bdc22100 + md5: 8c1a941fe77b920b1c7933a7a0c6bf2e + sha256: 0e947f376d6878bd8e505932277e84c373da492a38d2c4ef9d96fc25f5327845 category: main optional: false - name: dagster-webserver @@ -5164,10 +5255,10 @@ package: manager: conda platform: linux-64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2 + url: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda hash: - md5: 19410c3df09dfb12d1206132a1d357c5 - sha256: 470d5db54102bd51dbb0c5990324a2f4a0bc976faa493b22193338adb9882e2e + md5: 6185f640c43843e5ad6fd1c5372c3f80 + sha256: 056c85b482d58faab5fd4670b6c1f5df0986314cca3bc831d458b22e4ef2c792 category: main optional: false - name: font-ttf-ubuntu @@ -5175,10 +5266,10 @@ package: manager: conda platform: osx-64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2 + url: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda hash: - md5: 19410c3df09dfb12d1206132a1d357c5 - sha256: 470d5db54102bd51dbb0c5990324a2f4a0bc976faa493b22193338adb9882e2e + md5: 6185f640c43843e5ad6fd1c5372c3f80 + sha256: 056c85b482d58faab5fd4670b6c1f5df0986314cca3bc831d458b22e4ef2c792 category: main optional: false - name: font-ttf-ubuntu @@ -5186,10 +5277,10 @@ package: manager: conda platform: osx-arm64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2 + url: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda hash: - md5: 19410c3df09dfb12d1206132a1d357c5 - sha256: 470d5db54102bd51dbb0c5990324a2f4a0bc976faa493b22193338adb9882e2e + md5: 6185f640c43843e5ad6fd1c5372c3f80 + sha256: 056c85b482d58faab5fd4670b6c1f5df0986314cca3bc831d458b22e4ef2c792 category: main optional: false - name: fontconfig @@ -5294,8 +5385,8 @@ package: dependencies: font-ttf-inconsolata: "" font-ttf-source-code-pro: "" - font-ttf-dejavu-sans-mono: "" font-ttf-ubuntu: "" + font-ttf-dejavu-sans-mono: "" url: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2 hash: md5: f766549260d6815b0c52253f1fb1bb29 @@ -5309,8 +5400,8 @@ package: dependencies: font-ttf-inconsolata: "" font-ttf-source-code-pro: "" - font-ttf-dejavu-sans-mono: "" font-ttf-ubuntu: "" + font-ttf-dejavu-sans-mono: "" url: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2 hash: md5: f766549260d6815b0c52253f1fb1bb29 @@ -6379,7 +6470,7 @@ package: category: main optional: false - name: google-auth - version: 2.23.4 + version: 2.24.0 manager: conda platform: linux-64 dependencies: @@ -6392,14 +6483,14 @@ package: pyu2f: ">=0.1.5" requests: ">=2.20.0,<3.0.0" rsa: ">=3.1.4,<5" - url: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.23.4-pyhca7485f_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.24.0-pyhca7485f_0.conda hash: - md5: 9ad01e23627db9def3104ba78fd19229 - sha256: 1319ebc61518025e3bd7de38d27d254d8dcc61cc3b7d9fd1f62148ae614c8657 + md5: 5c80374ea4c24d3bd6822108d43715d0 + sha256: c270d1866bd01f3fa97e5c65496b853af6b1c8d58479132c8b3397534fbb2919 category: main optional: false - name: google-auth - version: 2.23.4 + version: 2.24.0 manager: conda platform: osx-64 dependencies: @@ -6412,14 +6503,14 @@ package: cachetools: ">=2.0.0,<6.0" aiohttp: ">=3.6.2,<4.0.0" cryptography: ">=38.0.3" - url: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.23.4-pyhca7485f_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.24.0-pyhca7485f_0.conda hash: - md5: 9ad01e23627db9def3104ba78fd19229 - sha256: 1319ebc61518025e3bd7de38d27d254d8dcc61cc3b7d9fd1f62148ae614c8657 + md5: 5c80374ea4c24d3bd6822108d43715d0 + sha256: c270d1866bd01f3fa97e5c65496b853af6b1c8d58479132c8b3397534fbb2919 category: main optional: false - name: google-auth - version: 2.23.4 + version: 2.24.0 manager: conda platform: osx-arm64 dependencies: @@ -6432,10 +6523,10 @@ package: cachetools: ">=2.0.0,<6.0" aiohttp: ">=3.6.2,<4.0.0" cryptography: ">=38.0.3" - url: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.23.4-pyhca7485f_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.24.0-pyhca7485f_0.conda hash: - md5: 9ad01e23627db9def3104ba78fd19229 - sha256: 1319ebc61518025e3bd7de38d27d254d8dcc61cc3b7d9fd1f62148ae614c8657 + md5: 5c80374ea4c24d3bd6822108d43715d0 + sha256: c270d1866bd01f3fa97e5c65496b853af6b1c8d58479132c8b3397534fbb2919 category: main optional: false - name: google-auth-oauthlib @@ -7803,54 +7894,54 @@ package: category: dev optional: true - name: httpx - version: 0.25.1 + version: 0.25.2 manager: conda platform: linux-64 dependencies: anyio: "" certifi: "" - httpcore: "" + httpcore: 1.* idna: "" python: ">=3.8" sniffio: "" - url: https://conda.anaconda.org/conda-forge/noarch/httpx-0.25.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/httpx-0.25.2-pyhd8ed1ab_0.conda hash: - md5: 3e00320730cb93fa4941a0cbea0db572 - sha256: fbf9e61459b65022eecbdbb19ec2dc83740374e7db981732d687456e5bcdff72 + md5: 80e638907ed5b5c5109c44660464d1e4 + sha256: 511f591643f9e67470d9217698a78dd40f3e6178798e79060c2142807e1d4861 category: main optional: false - name: httpx - version: 0.25.1 + version: 0.25.2 manager: conda platform: osx-64 dependencies: certifi: "" idna: "" - httpcore: "" anyio: "" sniffio: "" python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/httpx-0.25.1-pyhd8ed1ab_0.conda + httpcore: 1.* + url: https://conda.anaconda.org/conda-forge/noarch/httpx-0.25.2-pyhd8ed1ab_0.conda hash: - md5: 3e00320730cb93fa4941a0cbea0db572 - sha256: fbf9e61459b65022eecbdbb19ec2dc83740374e7db981732d687456e5bcdff72 + md5: 80e638907ed5b5c5109c44660464d1e4 + sha256: 511f591643f9e67470d9217698a78dd40f3e6178798e79060c2142807e1d4861 category: main optional: false - name: httpx - version: 0.25.1 + version: 0.25.2 manager: conda platform: osx-arm64 dependencies: certifi: "" idna: "" - httpcore: "" anyio: "" sniffio: "" python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/httpx-0.25.1-pyhd8ed1ab_0.conda + httpcore: 1.* + url: https://conda.anaconda.org/conda-forge/noarch/httpx-0.25.2-pyhd8ed1ab_0.conda hash: - md5: 3e00320730cb93fa4941a0cbea0db572 - sha256: fbf9e61459b65022eecbdbb19ec2dc83740374e7db981732d687456e5bcdff72 + md5: 80e638907ed5b5c5109c44660464d1e4 + sha256: 511f591643f9e67470d9217698a78dd40f3e6178798e79060c2142807e1d4861 category: main optional: false - name: humanfriendly @@ -7965,7 +8056,7 @@ package: category: main optional: false - name: hypothesis - version: 6.90.0 + version: 6.91.0 manager: conda platform: linux-64 dependencies: @@ -7976,14 +8067,14 @@ package: python: ">=3.8" setuptools: "" sortedcontainers: ">=2.1.0,<3.0.0" - url: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.90.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.91.0-pyha770c72_0.conda hash: - md5: 158cd5ffb2605febd8dfaff449079eed - sha256: 1e5b6e988349ca6e81c52b65c243d5000740bdc97f898d95aabef99fae173119 + md5: 06da22edc102acb39fcf951b4872f754 + sha256: 0dc4aab6a020d6ba5d569128155529137a48cc08d91928e1dbeb0bdcb4b27117 category: main optional: false - name: hypothesis - version: 6.90.0 + version: 6.91.0 manager: conda platform: osx-64 dependencies: @@ -7994,14 +8085,14 @@ package: sortedcontainers: ">=2.1.0,<3.0.0" backports.zoneinfo: ">=0.2.1" exceptiongroup: ">=1.0.0rc8" - url: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.90.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.91.0-pyha770c72_0.conda hash: - md5: 158cd5ffb2605febd8dfaff449079eed - sha256: 1e5b6e988349ca6e81c52b65c243d5000740bdc97f898d95aabef99fae173119 + md5: 06da22edc102acb39fcf951b4872f754 + sha256: 0dc4aab6a020d6ba5d569128155529137a48cc08d91928e1dbeb0bdcb4b27117 category: main optional: false - name: hypothesis - version: 6.90.0 + version: 6.91.0 manager: conda platform: osx-arm64 dependencies: @@ -8012,10 +8103,10 @@ package: sortedcontainers: ">=2.1.0,<3.0.0" backports.zoneinfo: ">=0.2.1" exceptiongroup: ">=1.0.0rc8" - url: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.90.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.91.0-pyha770c72_0.conda hash: - md5: 158cd5ffb2605febd8dfaff449079eed - sha256: 1e5b6e988349ca6e81c52b65c243d5000740bdc97f898d95aabef99fae173119 + md5: 06da22edc102acb39fcf951b4872f754 + sha256: 0dc4aab6a020d6ba5d569128155529137a48cc08d91928e1dbeb0bdcb4b27117 category: main optional: false - name: icu @@ -8093,39 +8184,39 @@ package: category: main optional: false - name: idna - version: "3.5" + version: "3.6" manager: conda platform: linux-64 dependencies: python: ">=3.6" - url: https://conda.anaconda.org/conda-forge/noarch/idna-3.5-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/idna-3.6-pyhd8ed1ab_0.conda hash: - md5: f8ae6c63eca96e5ad3074f4a27bcfc08 - sha256: 23c4e822fef052e45250e9186d2bf9bdb2ecb1cb78e05221a19fa6455205b360 + md5: 1a76f09108576397c41c0b0c5bd84134 + sha256: 6ee4c986d69ce61e60a20b2459b6f2027baeba153f0a64995fd3cb47c2cc7e07 category: main optional: false - name: idna - version: "3.5" + version: "3.6" manager: conda platform: osx-64 dependencies: python: ">=3.6" - url: https://conda.anaconda.org/conda-forge/noarch/idna-3.5-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/idna-3.6-pyhd8ed1ab_0.conda hash: - md5: f8ae6c63eca96e5ad3074f4a27bcfc08 - sha256: 23c4e822fef052e45250e9186d2bf9bdb2ecb1cb78e05221a19fa6455205b360 + md5: 1a76f09108576397c41c0b0c5bd84134 + sha256: 6ee4c986d69ce61e60a20b2459b6f2027baeba153f0a64995fd3cb47c2cc7e07 category: main optional: false - name: idna - version: "3.5" + version: "3.6" manager: conda platform: osx-arm64 dependencies: python: ">=3.6" - url: https://conda.anaconda.org/conda-forge/noarch/idna-3.5-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/idna-3.6-pyhd8ed1ab_0.conda hash: - md5: f8ae6c63eca96e5ad3074f4a27bcfc08 - sha256: 23c4e822fef052e45250e9186d2bf9bdb2ecb1cb78e05221a19fa6455205b360 + md5: 1a76f09108576397c41c0b0c5bd84134 + sha256: 6ee4c986d69ce61e60a20b2459b6f2027baeba153f0a64995fd3cb47c2cc7e07 category: main optional: false - name: ijson @@ -8428,77 +8519,75 @@ package: category: main optional: false - name: ipython - version: 8.18.0 + version: 8.18.1 manager: conda platform: linux-64 dependencies: - __linux: "" + __unix: "" decorator: "" exceptiongroup: "" jedi: ">=0.16" matplotlib-inline: "" pexpect: ">4.3" pickleshare: "" - prompt_toolkit: ">=3.0.30,<3.1.0,!=3.0.37" + prompt-toolkit: ">=3.0.30,<3.1.0,!=3.0.37" pygments: ">=2.4.0" python: ">=3.9" stack_data: "" traitlets: ">=5" typing_extensions: "" - url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.18.0-pyh0d859eb_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.18.1-pyh31011fe_1.conda hash: - md5: ee60af04bb7046ffdcbf2f1d2e8b0567 - sha256: 06f190aee3d0e6a9080389c093dc3a94a02fae6a2dab5fe7e14c0bb17196cea3 + md5: ac2f9c2e10c2e90e8d135cef51f9753a + sha256: 67490e640faa372d663a5c5cd2d61f417cce22a019a4de82a9e5ddb1cf2ee181 category: main optional: false - name: ipython - version: 8.18.0 + version: 8.18.1 manager: conda platform: osx-64 dependencies: typing_extensions: "" + __unix: "" decorator: "" - __osx: "" exceptiongroup: "" stack_data: "" matplotlib-inline: "" pickleshare: "" - appnope: "" python: ">=3.9" pygments: ">=2.4.0" traitlets: ">=5" jedi: ">=0.16" pexpect: ">4.3" - prompt_toolkit: ">=3.0.30,<3.1.0,!=3.0.37" - url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.18.0-pyh31c8845_0.conda + prompt-toolkit: ">=3.0.30,<3.1.0,!=3.0.37" + url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.18.1-pyh31011fe_1.conda hash: - md5: 78015fdf0aea454db0b0630c40c094d6 - sha256: 50c4f9b78d4448812e1af8f678075fa5f65fe4170986b506ac87a4208e6460b1 + md5: ac2f9c2e10c2e90e8d135cef51f9753a + sha256: 67490e640faa372d663a5c5cd2d61f417cce22a019a4de82a9e5ddb1cf2ee181 category: main optional: false - name: ipython - version: 8.18.0 + version: 8.18.1 manager: conda platform: osx-arm64 dependencies: typing_extensions: "" + __unix: "" decorator: "" - __osx: "" exceptiongroup: "" stack_data: "" matplotlib-inline: "" pickleshare: "" - appnope: "" python: ">=3.9" pygments: ">=2.4.0" traitlets: ">=5" jedi: ">=0.16" pexpect: ">4.3" - prompt_toolkit: ">=3.0.30,<3.1.0,!=3.0.37" - url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.18.0-pyh31c8845_0.conda + prompt-toolkit: ">=3.0.30,<3.1.0,!=3.0.37" + url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.18.1-pyh31011fe_1.conda hash: - md5: 78015fdf0aea454db0b0630c40c094d6 - sha256: 50c4f9b78d4448812e1af8f678075fa5f65fe4170986b506ac87a4208e6460b1 + md5: ac2f9c2e10c2e90e8d135cef51f9753a + sha256: 67490e640faa372d663a5c5cd2d61f417cce22a019a4de82a9e5ddb1cf2ee181 category: main optional: false - name: ipywidgets @@ -9155,45 +9244,45 @@ package: category: main optional: false - name: jsonschema-specifications - version: 2023.11.1 + version: 2023.11.2 manager: conda platform: linux-64 dependencies: importlib_resources: ">=1.4.0" python: ">=3.8" referencing: ">=0.31.0" - url: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.11.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.11.2-pyhd8ed1ab_0.conda hash: - md5: 094ff9cf36957f95bb74cee42ab140b2 - sha256: 17ac31b620a7bb81c6468b4ba9ad4aeb1c6c6669e9dd7e4ad909da48702a6091 + md5: 73884ca36d6d96cbce498cde99fab40f + sha256: e26115d02dc208a05b557c8dd670923270803b9b3b8af4e22b93d659d1ec77ec category: main optional: false - name: jsonschema-specifications - version: 2023.11.1 + version: 2023.11.2 manager: conda platform: osx-64 dependencies: python: ">=3.8" importlib_resources: ">=1.4.0" referencing: ">=0.31.0" - url: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.11.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.11.2-pyhd8ed1ab_0.conda hash: - md5: 094ff9cf36957f95bb74cee42ab140b2 - sha256: 17ac31b620a7bb81c6468b4ba9ad4aeb1c6c6669e9dd7e4ad909da48702a6091 + md5: 73884ca36d6d96cbce498cde99fab40f + sha256: e26115d02dc208a05b557c8dd670923270803b9b3b8af4e22b93d659d1ec77ec category: main optional: false - name: jsonschema-specifications - version: 2023.11.1 + version: 2023.11.2 manager: conda platform: osx-arm64 dependencies: python: ">=3.8" importlib_resources: ">=1.4.0" referencing: ">=0.31.0" - url: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.11.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.11.2-pyhd8ed1ab_0.conda hash: - md5: 094ff9cf36957f95bb74cee42ab140b2 - sha256: 17ac31b620a7bb81c6468b4ba9ad4aeb1c6c6669e9dd7e4ad909da48702a6091 + md5: 73884ca36d6d96cbce498cde99fab40f + sha256: e26115d02dc208a05b557c8dd670923270803b9b3b8af4e22b93d659d1ec77ec category: main optional: false - name: jsonschema-with-format-nongpl @@ -9314,45 +9403,45 @@ package: category: main optional: false - name: jupyter-lsp - version: 2.2.0 + version: 2.2.1 manager: conda platform: linux-64 dependencies: importlib-metadata: ">=4.8.3" jupyter_server: ">=1.1.2" python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.1-pyhd8ed1ab_0.conda hash: - md5: 38589f4104d11f2a59ff01a9f4e3bfb3 - sha256: 16fc7b40024adece716ba7227e5c123a2deccc13f946a10d9a3270493908d11c + md5: d1a5efc65bfabc3bfebf4d3a204da897 + sha256: 0f995f60609fb50db74bed3637165ad202cf091ec0804519c11b6cffce901e88 category: main optional: false - name: jupyter-lsp - version: 2.2.0 + version: 2.2.1 manager: conda platform: osx-64 dependencies: python: ">=3.8" importlib-metadata: ">=4.8.3" jupyter_server: ">=1.1.2" - url: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.1-pyhd8ed1ab_0.conda hash: - md5: 38589f4104d11f2a59ff01a9f4e3bfb3 - sha256: 16fc7b40024adece716ba7227e5c123a2deccc13f946a10d9a3270493908d11c + md5: d1a5efc65bfabc3bfebf4d3a204da897 + sha256: 0f995f60609fb50db74bed3637165ad202cf091ec0804519c11b6cffce901e88 category: main optional: false - name: jupyter-lsp - version: 2.2.0 + version: 2.2.1 manager: conda platform: osx-arm64 dependencies: python: ">=3.8" importlib-metadata: ">=4.8.3" jupyter_server: ">=1.1.2" - url: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.1-pyhd8ed1ab_0.conda hash: - md5: 38589f4104d11f2a59ff01a9f4e3bfb3 - sha256: 16fc7b40024adece716ba7227e5c123a2deccc13f946a10d9a3270493908d11c + md5: d1a5efc65bfabc3bfebf4d3a204da897 + sha256: 0f995f60609fb50db74bed3637165ad202cf091ec0804519c11b6cffce901e88 category: main optional: false - name: jupyter-resource-usage @@ -9617,7 +9706,7 @@ package: category: main optional: false - name: jupyter_server - version: 2.10.1 + version: 2.11.1 manager: conda platform: linux-64 dependencies: @@ -9640,14 +9729,14 @@ package: tornado: ">=6.2.0" traitlets: ">=5.6.0" websocket-client: "" - url: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.10.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.11.1-pyhd8ed1ab_0.conda hash: - md5: 7d15498584d83de3b357425e37086397 - sha256: b8b55ee57785b39a9096884bfd1da3858da8f27764572321d51a3dd0a990de86 + md5: 0699b715659c026f7f81c27d0e744205 + sha256: 605825c0e2d5af7935b37319b9a46ff39e081e7a0f4dc973f0dd583f41c69ce5 category: main optional: false - name: jupyter_server - version: 2.10.1 + version: 2.11.1 manager: conda platform: osx-64 dependencies: @@ -9670,14 +9759,14 @@ package: anyio: ">=3.1.0" send2trash: ">=1.8.2" jupyter_events: ">=0.9.0" - url: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.10.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.11.1-pyhd8ed1ab_0.conda hash: - md5: 7d15498584d83de3b357425e37086397 - sha256: b8b55ee57785b39a9096884bfd1da3858da8f27764572321d51a3dd0a990de86 + md5: 0699b715659c026f7f81c27d0e744205 + sha256: 605825c0e2d5af7935b37319b9a46ff39e081e7a0f4dc973f0dd583f41c69ce5 category: main optional: false - name: jupyter_server - version: 2.10.1 + version: 2.11.1 manager: conda platform: osx-arm64 dependencies: @@ -9700,10 +9789,10 @@ package: anyio: ">=3.1.0" send2trash: ">=1.8.2" jupyter_events: ">=0.9.0" - url: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.10.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.11.1-pyhd8ed1ab_0.conda hash: - md5: 7d15498584d83de3b357425e37086397 - sha256: b8b55ee57785b39a9096884bfd1da3858da8f27764572321d51a3dd0a990de86 + md5: 0699b715659c026f7f81c27d0e744205 + sha256: 605825c0e2d5af7935b37319b9a46ff39e081e7a0f4dc973f0dd583f41c69ce5 category: main optional: false - name: jupyter_server_terminals @@ -12346,7 +12435,7 @@ package: krb5: ">=1.21.2,<1.22.0a0" libgcc-ng: ">=12" libzlib: ">=1.2.13,<1.3.0a0" - openssl: ">=3.1.4,<4.0a0" + openssl: ">=3.1.4,<3.2.0a0" url: https://conda.anaconda.org/conda-forge/linux-64/libpq-16.1-hfc447b1_0.conda hash: md5: 2b7f1893cf40b4ccdc0230bcd94d5ed9 @@ -12360,7 +12449,7 @@ package: dependencies: krb5: ">=1.21.2,<1.22.0a0" libzlib: ">=1.2.13,<1.3.0a0" - openssl: ">=3.1.4,<4.0a0" + openssl: ">=3.1.4,<3.2.0a0" url: https://conda.anaconda.org/conda-forge/osx-64/libpq-16.1-h6dd4ff7_0.conda hash: md5: 39de94ff4ccc306f3d24ef7aef13c689 @@ -12374,7 +12463,7 @@ package: dependencies: krb5: ">=1.21.2,<1.22.0a0" libzlib: ">=1.2.13,<1.3.0a0" - openssl: ">=3.1.4,<4.0a0" + openssl: ">=3.1.4,<3.2.0a0" url: https://conda.anaconda.org/conda-forge/osx-arm64/libpq-16.1-hd435d45_0.conda hash: md5: 883bbf64780c91608f1a7df9203b79a5 @@ -13299,25 +13388,25 @@ package: category: main optional: false - name: llvm-openmp - version: 17.0.5 + version: 17.0.6 manager: conda platform: osx-64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-17.0.5-hb6ac08f_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-17.0.6-hb6ac08f_0.conda hash: - md5: 8ca3784280b7cb54163a46e8a918fb43 - sha256: 8ad5acab5d5fb38785c6f41e17e5e1729f305f4838cc3a4470688c6cf942c0da + md5: f260ab897df05f729fc3e65dbb0850ef + sha256: 9ea2f7018f335fdc55bc9b21a388eb94ea47a243d9cbf6ec3d8862d4df9fb49b category: main optional: false - name: llvm-openmp - version: 17.0.5 + version: 17.0.6 manager: conda platform: osx-arm64 dependencies: {} - url: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-17.0.5-hcd81f8e_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-17.0.6-hcd81f8e_0.conda hash: - md5: 7307ed345b859c2d6680d277dfc13bdd - sha256: d6ac131d98df60c85206455f49fe1921a9eeef9962bbe1f06ada22573c09b0e6 + md5: 52019d2fa0eddbbc4e6dcd30fae0c0a4 + sha256: 0c217326c5931c1416b82f98169b8a8a52139f6f5f299dbb2efa7b21f65f225a category: main optional: false - name: llvmlite @@ -14413,8 +14502,8 @@ package: entrypoints: ">=0.2.2" traitlets: ">=5.0" markupsafe: ">=2.0" - pandocfilters: ">=1.4.1" jupyter_core: ">=4.7" + pandocfilters: ">=1.4.1" nbformat: ">=5.1" pygments: ">=2.4.1" nbclient: ">=0.5.0" @@ -14441,8 +14530,8 @@ package: entrypoints: ">=0.2.2" traitlets: ">=5.0" markupsafe: ">=2.0" - pandocfilters: ">=1.4.1" jupyter_core: ">=4.7" + pandocfilters: ">=1.4.1" nbformat: ">=5.1" pygments: ">=2.4.1" nbclient: ">=0.5.0" @@ -14884,50 +14973,52 @@ package: category: main optional: false - name: nss - version: "3.94" + version: "3.95" manager: conda platform: linux-64 dependencies: __glibc: ">=2.17,<3.0.a0" libgcc-ng: ">=12" - libsqlite: ">=3.43.0,<4.0a0" + libsqlite: ">=3.44.2,<4.0a0" libstdcxx-ng: ">=12" libzlib: ">=1.2.13,<1.3.0a0" nspr: ">=4.35,<5.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/nss-3.94-h1d7d5a4_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/nss-3.95-h1d7d5a4_0.conda hash: - md5: 7caef74bbfa730e014b20f0852068509 - sha256: c9b7910fc554c6550905b9150f4c8230e973ca63f41b42f2c18a49e8aa458e78 + md5: d3a8067adcc45a923f4b1987c91d69da + sha256: 02d8e38b4708ce707e51084d0dff7286e6e6d24d1bf32ebbda7710fac4a0581e category: main optional: false - name: nss - version: "3.94" + version: "3.95" manager: conda platform: osx-64 dependencies: - libcxx: ">=15.0.7" - libsqlite: ">=3.43.0,<4.0a0" + __osx: ">=10.9" + libcxx: ">=16.0.6" + libsqlite: ">=3.44.2,<4.0a0" libzlib: ">=1.2.13,<1.3.0a0" nspr: ">=4.35,<5.0a0" - url: https://conda.anaconda.org/conda-forge/osx-64/nss-3.94-hd6ac835_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/nss-3.95-hfeb00ea_0.conda hash: - md5: 10c69224110baa4d7d4f1bdb03d4f383 - sha256: aafb8b2a51beaa407d4e712d11e2a34fc010c7727d8a5573fb0c7ae53f2fff75 + md5: 635525316f129644b60e3772e67fe5d8 + sha256: e57c7bb5f315868df1570d00de8ea7f03f3bc9659571111fef4f37d3f2553eb1 category: main optional: false - name: nss - version: "3.94" + version: "3.95" manager: conda platform: osx-arm64 dependencies: - libcxx: ">=15.0.7" - libsqlite: ">=3.43.0,<4.0a0" + __osx: ">=10.9" + libcxx: ">=16.0.6" + libsqlite: ">=3.44.2,<4.0a0" libzlib: ">=1.2.13,<1.3.0a0" nspr: ">=4.35,<5.0a0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/nss-3.94-hc6b9969_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/nss-3.95-h6cf673f_0.conda hash: - md5: 4dec6b96cec24e41059c2e795755760a - sha256: 662782a095cc191c073db8e44e14bf8877252d98b1f9b69275d79c47af185bb5 + md5: 45c7275beb042f0a6655674c95abab32 + sha256: 1a7c5c5f920a63fc2aa8e3bddff9df5284dd5eedc4a3702dd479d4b970db728f category: main optional: false - name: numba @@ -15222,40 +15313,40 @@ package: category: main optional: false - name: openssl - version: 3.2.0 + version: 3.1.4 manager: conda platform: linux-64 dependencies: ca-certificates: "" libgcc-ng: ">=12" - url: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.0-hd590300_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.4-hd590300_0.conda hash: - md5: 68223671a2b68cdf7241eb4679ab2dd4 - sha256: a8ca7c31be33894bd70bb34786d1a8c26ae650382411250b61f6b5249b69a23e + md5: 412ba6938c3e2abaca8b1129ea82e238 + sha256: d15b3e83ce66c6f6fbb4707f2f5c53337124c01fb03bfda1cf25c5b41123efc7 category: main optional: false - name: openssl - version: 3.2.0 + version: 3.1.4 manager: conda platform: osx-64 dependencies: ca-certificates: "" - url: https://conda.anaconda.org/conda-forge/osx-64/openssl-3.2.0-hd75f5a5_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/openssl-3.1.4-hd75f5a5_0.conda hash: - md5: fe12816a450e2807b99798e68a7b599d - sha256: 840aa40f278d486688198514dd70df7bd7cb0659bd63d79427bf9a7e151c5dfb + md5: bc9201da6eb1e0df4107901df5371347 + sha256: 1c436103a8de0dc82c9c56974badaa1b8b8f8cd9f37c2766bd50cd9899720f6b category: main optional: false - name: openssl - version: 3.2.0 + version: 3.1.4 manager: conda platform: osx-arm64 dependencies: ca-certificates: "" - url: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.2.0-h0d3ecfb_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.1.4-h0d3ecfb_0.conda hash: - md5: 4521ffa5c81e02feb99f1c8e87301a4f - sha256: 4fb546cf566287a80658e2d778e27c241bcc5743478da6cb03bf0b4ef8226e1c + md5: 5a89552fececf4cd99628318ccbb67a3 + sha256: 3c715b1d4940c7ad6065935db18924b85a54048dde066f963cfc250340639457 category: main optional: false - name: orc @@ -17106,39 +17197,39 @@ package: category: main optional: false - name: pyasn1 - version: 0.5.0 + version: 0.5.1 manager: conda platform: linux-64 dependencies: python: "!=3.0,!=3.1,!=3.2,!=3.3,!=3.4,!=3.5" - url: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.5.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.5.1-pyhd8ed1ab_0.conda hash: - md5: 4b1c0db24e212190be1969b0aa490ad8 - sha256: 259b1107ae7d6983a8fdebe1717b67005fdf5328e827f33d38a9df43dee5ef82 + md5: fb1a800972b072aa4d16450983c81418 + sha256: 8b116da9acbb471e107203c11acaffcb259aca2367aa7e83e796e43ed5d381b3 category: main optional: false - name: pyasn1 - version: 0.5.0 + version: 0.5.1 manager: conda platform: osx-64 dependencies: python: "!=3.0,!=3.1,!=3.2,!=3.3,!=3.4,!=3.5" - url: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.5.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.5.1-pyhd8ed1ab_0.conda hash: - md5: 4b1c0db24e212190be1969b0aa490ad8 - sha256: 259b1107ae7d6983a8fdebe1717b67005fdf5328e827f33d38a9df43dee5ef82 + md5: fb1a800972b072aa4d16450983c81418 + sha256: 8b116da9acbb471e107203c11acaffcb259aca2367aa7e83e796e43ed5d381b3 category: main optional: false - name: pyasn1 - version: 0.5.0 + version: 0.5.1 manager: conda platform: osx-arm64 dependencies: python: "!=3.0,!=3.1,!=3.2,!=3.3,!=3.4,!=3.5" - url: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.5.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.5.1-pyhd8ed1ab_0.conda hash: - md5: 4b1c0db24e212190be1969b0aa490ad8 - sha256: 259b1107ae7d6983a8fdebe1717b67005fdf5328e827f33d38a9df43dee5ef82 + md5: fb1a800972b072aa4d16450983c81418 + sha256: 8b116da9acbb471e107203c11acaffcb259aca2367aa7e83e796e43ed5d381b3 category: main optional: false - name: pyasn1-modules @@ -17313,46 +17404,133 @@ package: category: main optional: false - name: pydantic - version: 1.10.13 + version: 2.5.2 + manager: conda + platform: linux-64 + dependencies: + annotated-types: ">=0.4.0" + pydantic-core: 2.14.5 + python: ">=3.7" + typing-extensions: ">=4.6.1" + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.5.2-pyhd8ed1ab_0.conda + hash: + md5: 3f908ebfccbfd09946961862d26bb9af + sha256: e3baa6424af931d8d7c5a0554b24d85faf3471df8036181d598065beed3096de + category: main + optional: false + - name: pydantic + version: 2.5.2 + manager: conda + platform: osx-64 + dependencies: + python: ">=3.7" + annotated-types: ">=0.4.0" + typing-extensions: ">=4.6.1" + pydantic-core: 2.14.5 + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.5.2-pyhd8ed1ab_0.conda + hash: + md5: 3f908ebfccbfd09946961862d26bb9af + sha256: e3baa6424af931d8d7c5a0554b24d85faf3471df8036181d598065beed3096de + category: main + optional: false + - name: pydantic + version: 2.5.2 + manager: conda + platform: osx-arm64 + dependencies: + python: ">=3.7" + annotated-types: ">=0.4.0" + typing-extensions: ">=4.6.1" + pydantic-core: 2.14.5 + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.5.2-pyhd8ed1ab_0.conda + hash: + md5: 3f908ebfccbfd09946961862d26bb9af + sha256: e3baa6424af931d8d7c5a0554b24d85faf3471df8036181d598065beed3096de + category: main + optional: false + - name: pydantic-core + version: 2.14.5 manager: conda platform: linux-64 dependencies: libgcc-ng: ">=12" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - typing-extensions: ">=4.2.0" - url: https://conda.anaconda.org/conda-forge/linux-64/pydantic-1.10.13-py311h459d7ec_1.conda + typing-extensions: ">=4.6.0" + url: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.14.5-py311h46250e7_0.conda hash: - md5: 8a92f40420211897a35841861e7e8348 - sha256: f2d3a838fc90699c5dcd537aff10c78b33bd755232d0b21b26247cbf185cced7 + md5: 9b2d1233d958079649cc8f91d814e04f + sha256: c546a042316c34bf6b9c5e16da4e6993f6712554c0ac5ee3f49260260789c38f category: main optional: false - - name: pydantic - version: 1.10.13 + - name: pydantic-core + version: 2.14.5 manager: conda platform: osx-64 dependencies: python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - typing-extensions: ">=4.2.0" - url: https://conda.anaconda.org/conda-forge/osx-64/pydantic-1.10.13-py311he705e18_1.conda + typing-extensions: ">=4.6.0" + url: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.14.5-py311h5e0f0e4_0.conda hash: - md5: ca0cd7b41964ce9a7b80290ea85e22e9 - sha256: c55ab5f7d182421a5c11f70afc32425fa192f1e40de5c301f685b25bdc3391a8 + md5: 915ef17e91fbe6c5a2d2ea502e112ae7 + sha256: d79248cd5511c1153981b7fdcad4ef72a17985e604488a3ad1f242e2cd1e2622 category: main optional: false - - name: pydantic - version: 1.10.13 + - name: pydantic-core + version: 2.14.5 manager: conda platform: osx-arm64 dependencies: python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - typing-extensions: ">=4.2.0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-1.10.13-py311h05b510d_1.conda + typing-extensions: ">=4.6.0" + url: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.14.5-py311h94f323b_0.conda + hash: + md5: 781b4ba31a7ef7f3e2b4f2726dd99a81 + sha256: cac857e276d469d69af66b7f1972a8329f7843237d1b666ef7e2918cf9f83944 + category: main + optional: false + - name: pydantic-settings + version: 2.1.0 + manager: conda + platform: linux-64 + dependencies: + pydantic: ">=2.3.0" + python: ">=3.8" + python-dotenv: ">=0.21.0" + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.1.0-pyhd8ed1ab_1.conda + hash: + md5: 2a63052c1180846d4a6aaa9df7e113a3 + sha256: 2c80df34463dabec383b37dc19da48f84a1ea97f3d828d6d0dd220110da5f4e1 + category: main + optional: false + - name: pydantic-settings + version: 2.1.0 + manager: conda + platform: osx-64 + dependencies: + python: ">=3.8" + python-dotenv: ">=0.21.0" + pydantic: ">=2.3.0" + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.1.0-pyhd8ed1ab_1.conda + hash: + md5: 2a63052c1180846d4a6aaa9df7e113a3 + sha256: 2c80df34463dabec383b37dc19da48f84a1ea97f3d828d6d0dd220110da5f4e1 + category: main + optional: false + - name: pydantic-settings + version: 2.1.0 + manager: conda + platform: osx-arm64 + dependencies: + python: ">=3.8" + python-dotenv: ">=0.21.0" + pydantic: ">=2.3.0" + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.1.0-pyhd8ed1ab_1.conda hash: - md5: afdac206ecd2d91cd5478038e4cae4cf - sha256: eb7af4932468d40ef44fc595ff09f0ad5287a3ab2098b152b4b7fb1bd76782e5 + md5: 2a63052c1180846d4a6aaa9df7e113a3 + sha256: 2c80df34463dabec383b37dc19da48f84a1ea97f3d828d6d0dd220110da5f4e1 category: main optional: false - name: pygments @@ -18775,17 +18953,17 @@ package: category: main optional: false - name: rdma-core - version: "28.9" + version: "49.0" manager: conda platform: linux-64 dependencies: __glibc: ">=2.17,<3.0.a0" libgcc-ng: ">=12" libstdcxx-ng: ">=12" - url: https://conda.anaconda.org/conda-forge/linux-64/rdma-core-28.9-h59595ed_1.conda + url: https://conda.anaconda.org/conda-forge/linux-64/rdma-core-49.0-hd3aeb46_1.conda hash: - md5: aeffb7c06b5f65e55e6c637408dc4100 - sha256: 832f9393ab3144ce6468c6f150db9d398fad4451e96a8879afb3059f0c9902f6 + md5: 434d42b3ee35e1aaf6bb42d87730d1a4 + sha256: dca608dd54c7782f15b6a99220fa1ac8f744c1e183ba69b5d0f29c5be85865b1 category: main optional: false - name: re2 @@ -18964,45 +19142,45 @@ package: category: main optional: false - name: referencing - version: 0.31.0 + version: 0.31.1 manager: conda platform: linux-64 dependencies: attrs: ">=22.2.0" python: ">=3.8" rpds-py: ">=0.7.0" - url: https://conda.anaconda.org/conda-forge/noarch/referencing-0.31.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/referencing-0.31.1-pyhd8ed1ab_0.conda hash: - md5: 38c2b9b24e9a58725a233f1fa32c23e9 - sha256: 108f27bf249a581acd0f1de0e1e6a4d814ab18943178c2d9a4df02f5c16d2102 + md5: ae08039cf63eb82637b867aea3f04758 + sha256: efb91b7d2f6e729c01676e52e99071db819628a9f0a3a519c8969f0d2350a371 category: main optional: false - name: referencing - version: 0.31.0 + version: 0.31.1 manager: conda platform: osx-64 dependencies: python: ">=3.8" attrs: ">=22.2.0" rpds-py: ">=0.7.0" - url: https://conda.anaconda.org/conda-forge/noarch/referencing-0.31.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/referencing-0.31.1-pyhd8ed1ab_0.conda hash: - md5: 38c2b9b24e9a58725a233f1fa32c23e9 - sha256: 108f27bf249a581acd0f1de0e1e6a4d814ab18943178c2d9a4df02f5c16d2102 + md5: ae08039cf63eb82637b867aea3f04758 + sha256: efb91b7d2f6e729c01676e52e99071db819628a9f0a3a519c8969f0d2350a371 category: main optional: false - name: referencing - version: 0.31.0 + version: 0.31.1 manager: conda platform: osx-arm64 dependencies: python: ">=3.8" attrs: ">=22.2.0" rpds-py: ">=0.7.0" - url: https://conda.anaconda.org/conda-forge/noarch/referencing-0.31.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/referencing-0.31.1-pyhd8ed1ab_0.conda hash: - md5: 38c2b9b24e9a58725a233f1fa32c23e9 - sha256: 108f27bf249a581acd0f1de0e1e6a4d814ab18943178c2d9a4df02f5c16d2102 + md5: ae08039cf63eb82637b867aea3f04758 + sha256: efb91b7d2f6e729c01676e52e99071db819628a9f0a3a519c8969f0d2350a371 category: main optional: false - name: regex @@ -19421,43 +19599,43 @@ package: category: main optional: false - name: rpds-py - version: 0.13.1 + version: 0.13.2 manager: conda platform: linux-64 dependencies: libgcc-ng: ">=12" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.13.1-py311h46250e7_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.13.2-py311h46250e7_0.conda hash: - md5: b1924481122f7cb41cb001f5c96bf3f6 - sha256: 014f0393f43a67b43747b070a0619f84841d4c961597c30936d264abf899c39c + md5: c5f5089dd1fe0000fecaf0d12eca50b9 + sha256: 087429b28f17d6b9df1492120c1aebf93f47441b3c071e8a06796a0502ff7ee9 category: main optional: false - name: rpds-py - version: 0.13.1 + version: 0.13.2 manager: conda platform: osx-64 dependencies: python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-64/rpds-py-0.13.1-py311h5e0f0e4_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/rpds-py-0.13.2-py311h5e0f0e4_0.conda hash: - md5: 96774911faf26609ab33aaa4246c1a06 - sha256: c591e79f21b60f5f37af31dd563f4515678f85b222b927138f94e8316cddf9e9 + md5: ca1b5674090b4f21c474bc3558dd0f72 + sha256: 4aeefa26b76d99c9aaf8f319493be42b2c016ab6b553b5ab8b3a6bb08737ff31 category: main optional: false - name: rpds-py - version: 0.13.1 + version: 0.13.2 manager: conda platform: osx-arm64 dependencies: python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-arm64/rpds-py-0.13.1-py311h94f323b_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/rpds-py-0.13.2-py311h94f323b_0.conda hash: - md5: c392d76b681cfa1ea2155233af59f947 - sha256: dd7ea62b2860ec1da543e017ad9163623befde9b16df0098b8a631c9188ad203 + md5: 78c562a4e5efd7a2859c9a3ac3b88e35 + sha256: ca2106c533a9c9bfd8b9bae3ee9059a9d40cbdcfb7cfd085286e92c5e6f87c41 category: main optional: false - name: rsa @@ -19686,42 +19864,42 @@ package: category: main optional: false - name: s3transfer - version: 0.7.0 + version: 0.8.2 manager: conda platform: linux-64 dependencies: - botocore: ">=1.12.36,<2.0a.0" + botocore: ">=1.33.2,<2.0a.0" python: ">=3.7" - url: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.7.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.8.2-pyhd8ed1ab_0.conda hash: - md5: 5fe335cb1420d13a818fe01310af2b80 - sha256: 5ed09d013ad7f2c2f65d1637c04ee19da242ef9bed0d86aa9faae2c48aaa255d + md5: 75e12933f4bf755c9cdd37072bcb6203 + sha256: 2e5679abcec8eb646df37518ecdbdaa224d7ff5295a1e56707317d52b47d9c79 category: main optional: false - name: s3transfer - version: 0.7.0 + version: 0.8.2 manager: conda platform: osx-64 dependencies: python: ">=3.7" - botocore: ">=1.12.36,<2.0a.0" - url: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.7.0-pyhd8ed1ab_0.conda + botocore: ">=1.33.2,<2.0a.0" + url: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.8.2-pyhd8ed1ab_0.conda hash: - md5: 5fe335cb1420d13a818fe01310af2b80 - sha256: 5ed09d013ad7f2c2f65d1637c04ee19da242ef9bed0d86aa9faae2c48aaa255d + md5: 75e12933f4bf755c9cdd37072bcb6203 + sha256: 2e5679abcec8eb646df37518ecdbdaa224d7ff5295a1e56707317d52b47d9c79 category: main optional: false - name: s3transfer - version: 0.7.0 + version: 0.8.2 manager: conda platform: osx-arm64 dependencies: python: ">=3.7" - botocore: ">=1.12.36,<2.0a.0" - url: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.7.0-pyhd8ed1ab_0.conda + botocore: ">=1.33.2,<2.0a.0" + url: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.8.2-pyhd8ed1ab_0.conda hash: - md5: 5fe335cb1420d13a818fe01310af2b80 - sha256: 5ed09d013ad7f2c2f65d1637c04ee19da242ef9bed0d86aa9faae2c48aaa255d + md5: 75e12933f4bf755c9cdd37072bcb6203 + sha256: 2e5679abcec8eb646df37518ecdbdaa224d7ff5295a1e56707317d52b47d9c79 category: main optional: false - name: scikit-learn @@ -21846,39 +22024,39 @@ package: category: main optional: false - name: traitlets - version: 5.13.0 + version: 5.14.0 manager: conda platform: linux-64 dependencies: python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.13.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.0-pyhd8ed1ab_0.conda hash: - md5: 8a9953c15e1e5a7c1baddbbf4511a567 - sha256: 7ac67960ba2e8c16818043cc65ac6190fa4fd95f5b24357df58e4f73d5e60a10 + md5: 886f4a84ddb49b943b1697ac314e85b3 + sha256: c32412029033264140926be474d327d7fd57c0d11db9b1745396b3d4db78a799 category: main optional: false - name: traitlets - version: 5.13.0 + version: 5.14.0 manager: conda platform: osx-64 dependencies: python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.13.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.0-pyhd8ed1ab_0.conda hash: - md5: 8a9953c15e1e5a7c1baddbbf4511a567 - sha256: 7ac67960ba2e8c16818043cc65ac6190fa4fd95f5b24357df58e4f73d5e60a10 + md5: 886f4a84ddb49b943b1697ac314e85b3 + sha256: c32412029033264140926be474d327d7fd57c0d11db9b1745396b3d4db78a799 category: main optional: false - name: traitlets - version: 5.13.0 + version: 5.14.0 manager: conda platform: osx-arm64 dependencies: python: ">=3.8" - url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.13.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.0-pyhd8ed1ab_0.conda hash: - md5: 8a9953c15e1e5a7c1baddbbf4511a567 - sha256: 7ac67960ba2e8c16818043cc65ac6190fa4fd95f5b24357df58e4f73d5e60a10 + md5: 886f4a84ddb49b943b1697ac314e85b3 + sha256: c32412029033264140926be474d327d7fd57c0d11db9b1745396b3d4db78a799 category: main optional: false - name: typeguard @@ -22272,11 +22450,11 @@ package: libgcc-ng: ">=12" libnuma: ">=2.0.16,<3.0a0" libstdcxx-ng: ">=12" - rdma-core: ">=28.9,<29.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/ucx-1.15.0-h64cca9d_0.conda + rdma-core: ">=48.0" + url: https://conda.anaconda.org/conda-forge/linux-64/ucx-1.15.0-hae80064_1.conda hash: - md5: b35b1f1a9fdbf93266c91f297dc9060e - sha256: 8a4dce10304fee0df715addec3d078421aa7aa0824422a6630d621d15bd98e5f + md5: c0413425844278251c1cc9459386339b + sha256: f511a735bf7a0b56c5ae48839e2248d46a922ffc6ad8ea2da7617485faa70c6f category: main optional: false - name: ukkonen @@ -22330,11 +22508,11 @@ package: manager: conda platform: linux-64 dependencies: - python: "" - url: https://conda.anaconda.org/conda-forge/noarch/unicodecsv-0.14.1-py_1.tar.bz2 + python: ">=3.7" + url: https://conda.anaconda.org/conda-forge/noarch/unicodecsv-0.14.1-pyhd8ed1ab_2.conda hash: - md5: 3b2b0e9d7f73db2b5e45db113badb7f7 - sha256: 4bbf3579d57036725562ccc11c57bc487f1eb5c14c138a6881d10f34c2f04237 + md5: 21c43fb7afe9826a5da5506d45176cce + sha256: 263178c9ef2501aa8e3b7e8fe8187fd532065d04b9088e5fedcff8870ae75cf7 category: main optional: false - name: unicodecsv @@ -22342,11 +22520,11 @@ package: manager: conda platform: osx-64 dependencies: - python: "" - url: https://conda.anaconda.org/conda-forge/noarch/unicodecsv-0.14.1-py_1.tar.bz2 + python: ">=3.7" + url: https://conda.anaconda.org/conda-forge/noarch/unicodecsv-0.14.1-pyhd8ed1ab_2.conda hash: - md5: 3b2b0e9d7f73db2b5e45db113badb7f7 - sha256: 4bbf3579d57036725562ccc11c57bc487f1eb5c14c138a6881d10f34c2f04237 + md5: 21c43fb7afe9826a5da5506d45176cce + sha256: 263178c9ef2501aa8e3b7e8fe8187fd532065d04b9088e5fedcff8870ae75cf7 category: main optional: false - name: unicodecsv @@ -22354,11 +22532,11 @@ package: manager: conda platform: osx-arm64 dependencies: - python: "" - url: https://conda.anaconda.org/conda-forge/noarch/unicodecsv-0.14.1-py_1.tar.bz2 + python: ">=3.7" + url: https://conda.anaconda.org/conda-forge/noarch/unicodecsv-0.14.1-pyhd8ed1ab_2.conda hash: - md5: 3b2b0e9d7f73db2b5e45db113badb7f7 - sha256: 4bbf3579d57036725562ccc11c57bc487f1eb5c14c138a6881d10f34c2f04237 + md5: 21c43fb7afe9826a5da5506d45176cce + sha256: 263178c9ef2501aa8e3b7e8fe8187fd532065d04b9088e5fedcff8870ae75cf7 category: main optional: false - name: universal_pathlib @@ -22516,7 +22694,7 @@ package: category: main optional: false - name: uvicorn - version: 0.24.0 + version: 0.24.0.post1 manager: conda platform: linux-64 dependencies: @@ -22524,14 +22702,14 @@ package: h11: ">=0.8" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/linux-64/uvicorn-0.24.0-py311h38be061_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/uvicorn-0.24.0.post1-py311h38be061_0.conda hash: - md5: a6eb331b0b42251227dbdfb5838c287b - sha256: df5269d01ba7ae8fa7cc0d822a63db7a646005c689e8a90083f145a707df6035 + md5: 2b1938535dcb0385f024f6fa66eb63ad + sha256: e79e66a3baa0fc0ae6d3ca28305a5222f9f3f451d95369a9a6a8a13fbaa20eaa category: main optional: false - name: uvicorn - version: 0.24.0 + version: 0.24.0.post1 manager: conda platform: osx-64 dependencies: @@ -22539,14 +22717,14 @@ package: h11: ">=0.8" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-64/uvicorn-0.24.0-py311h6eed73b_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/uvicorn-0.24.0.post1-py311h6eed73b_0.conda hash: - md5: 62249aa566e8be9286966278a6582e1a - sha256: ab7aa3875fbafd7912b97616573741508e140446fa9819ba870788677ba8fba3 + md5: 84b59bc83d504ffdceaa08a16fbb0b03 + sha256: 092a4960fff7aa0263974cfdbf0eee85d53032633293be73bcf971f259fdf869 category: main optional: false - name: uvicorn - version: 0.24.0 + version: 0.24.0.post1 manager: conda platform: osx-arm64 dependencies: @@ -22554,14 +22732,14 @@ package: h11: ">=0.8" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-arm64/uvicorn-0.24.0-py311h267d04e_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/uvicorn-0.24.0.post1-py311h267d04e_0.conda hash: - md5: ed05fec89baaa5869db4e27bf4d510dc - sha256: 275934feb0e2cdfacd65414d8e54d3a9aa0e703f11a52ca3a0485df04a51cf77 + md5: 72fa7ae2d42c1b919afc2a05c94366da + sha256: 092af8de831585eea0c8980a06194ffcd558a41744b17028f7d81dc333726351 category: main optional: false - name: uvicorn-standard - version: 0.24.0 + version: 0.24.0.post1 manager: conda platform: linux-64 dependencies: @@ -22569,18 +22747,18 @@ package: python-dotenv: ">=0.13" python_abi: 3.11.* pyyaml: ">=5.1" - uvicorn: 0.24.0 + uvicorn: 0.24.0.post1 uvloop: ">=0.14.0,!=0.15.0,!=0.15.1" watchfiles: ">=0.13" websockets: ">=10.4" - url: https://conda.anaconda.org/conda-forge/linux-64/uvicorn-standard-0.24.0-h38be061_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/uvicorn-standard-0.24.0.post1-h38be061_0.conda hash: - md5: e8143a99cadb40ba9542e6e9ff15d862 - sha256: dc23a3aff61791522ab1d924c0f6b67468c3c72772c5ca690158c160ae42ac33 + md5: bc7779ba8fab689013281f98989f321e + sha256: c0cd1953e1bc87120dce855ee38828bfdf9c66d64dc2796c49b40cd69cfc09cd category: dev optional: true - name: uvicorn-standard - version: 0.24.0 + version: 0.24.0.post1 manager: conda platform: osx-64 dependencies: @@ -22588,18 +22766,18 @@ package: python-dotenv: ">=0.13" python_abi: 3.11.* pyyaml: ">=5.1" - uvicorn: 0.24.0 + uvicorn: 0.24.0.post1 uvloop: ">=0.14.0,!=0.15.0,!=0.15.1" watchfiles: ">=0.13" websockets: ">=10.4" - url: https://conda.anaconda.org/conda-forge/osx-64/uvicorn-standard-0.24.0-h6eed73b_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/uvicorn-standard-0.24.0.post1-h6eed73b_0.conda hash: - md5: fcfded7537383dc21fc53708048fb40f - sha256: 30476332eed1f448bfe769dcdf8a5a68e55587980026eae317c2a84b17daac2b + md5: fc3f047cd7236a5d906b828a9bbec38b + sha256: b8885240415223f1e176c8a0af7f3dd5b39cf303338425f75befb591e23c7c1d category: dev optional: true - name: uvicorn-standard - version: 0.24.0 + version: 0.24.0.post1 manager: conda platform: osx-arm64 dependencies: @@ -22607,14 +22785,14 @@ package: python-dotenv: ">=0.13" python_abi: 3.11.* pyyaml: ">=5.1" - uvicorn: 0.24.0 + uvicorn: 0.24.0.post1 uvloop: ">=0.14.0,!=0.15.0,!=0.15.1" watchfiles: ">=0.13" websockets: ">=10.4" - url: https://conda.anaconda.org/conda-forge/osx-arm64/uvicorn-standard-0.24.0-ha1ab1f8_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/uvicorn-standard-0.24.0.post1-ha1ab1f8_0.conda hash: - md5: e35093930996a0cd5668b020f880e0f2 - sha256: 391af506e734bd59d1a3b4611e27393b26ea6aa585070a63a45d4522a1fbd500 + md5: e4c1c55ae8b7aab529ecf8211f4cc4e4 + sha256: 92230fa9751494f3bc00c552803050cddf578e216421b3b7825a5d40dacea4d0 category: dev optional: true - name: uvloop @@ -22785,7 +22963,7 @@ package: category: main optional: false - name: watchfiles - version: 0.20.0 + version: 0.21.0 manager: conda platform: linux-64 dependencies: @@ -22793,38 +22971,38 @@ package: libgcc-ng: ">=12" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/linux-64/watchfiles-0.20.0-py311h46250e7_2.conda + url: https://conda.anaconda.org/conda-forge/linux-64/watchfiles-0.21.0-py311h46250e7_0.conda hash: - md5: 19667098320c065048a8e483ac860051 - sha256: 2ca7e2ebbc165401723801e9a366fb314726b375574ca635ab78527ae9363cf3 + md5: a09a506aee55efbf1d455e25c2233efe + sha256: fcacaf84e6ef05f216f8b89c8ac26ec34bf0d18117977fdebd03c80bfd74923d category: dev optional: true - name: watchfiles - version: 0.20.0 + version: 0.21.0 manager: conda platform: osx-64 dependencies: anyio: ">=3.0.0" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-64/watchfiles-0.20.0-py311h299eb51_2.conda + url: https://conda.anaconda.org/conda-forge/osx-64/watchfiles-0.21.0-py311h5e0f0e4_0.conda hash: - md5: f9c3352b6007cb4d6db914f9814d0c3b - sha256: 1426317d424057bb6dedd463481202521bde50fd965940ce0b42fe71d5d20751 + md5: 17c742cd36958bb973ca0194bbba2d7b + sha256: 174ba1f4340ecbfd823314b65c80b951aa94e74c3ca7b3fbe7666362082e54fa category: dev optional: true - name: watchfiles - version: 0.20.0 + version: 0.21.0 manager: conda platform: osx-arm64 dependencies: anyio: ">=3.0.0" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-arm64/watchfiles-0.20.0-py311h0563b04_2.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/watchfiles-0.21.0-py311h94f323b_0.conda hash: - md5: ea73d4e419ab5459d0a91aff3c481a82 - sha256: 6517e3498c432858c1631d20476faf87bc071eb97f252c02190472c97e87b7fe + md5: 2dfb9cc5cc18d99d737abeaad27ef8f9 + sha256: d1f5537fe556d10b915e3fb3e5f9098b10666e560ce4e6639467e4e5170009da category: dev optional: true - name: wcwidth @@ -23012,39 +23190,39 @@ package: category: dev optional: true - name: wheel - version: 0.41.3 + version: 0.42.0 manager: conda platform: linux-64 dependencies: python: ">=3.7" - url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.3-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda hash: - md5: 3fc026b9c87d091c4b34a6c997324ae8 - sha256: 84c3b57fba778add2bd47b7cc70e86f746d2c55549ffd2ccb6f3d6bf7c94d21d + md5: 1cdea58981c5cbc17b51973bcaddcea7 + sha256: 80be0ccc815ce22f80c141013302839b0ed938a2edb50b846cf48d8a8c1cfa01 category: main optional: false - name: wheel - version: 0.41.3 + version: 0.42.0 manager: conda platform: osx-64 dependencies: python: ">=3.7" - url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.3-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda hash: - md5: 3fc026b9c87d091c4b34a6c997324ae8 - sha256: 84c3b57fba778add2bd47b7cc70e86f746d2c55549ffd2ccb6f3d6bf7c94d21d + md5: 1cdea58981c5cbc17b51973bcaddcea7 + sha256: 80be0ccc815ce22f80c141013302839b0ed938a2edb50b846cf48d8a8c1cfa01 category: main optional: false - name: wheel - version: 0.41.3 + version: 0.42.0 manager: conda platform: osx-arm64 dependencies: python: ">=3.7" - url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.3-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda hash: - md5: 3fc026b9c87d091c4b34a6c997324ae8 - sha256: 84c3b57fba778add2bd47b7cc70e86f746d2c55549ffd2ccb6f3d6bf7c94d21d + md5: 1cdea58981c5cbc17b51973bcaddcea7 + sha256: 80be0ccc815ce22f80c141013302839b0ed938a2edb50b846cf48d8a8c1cfa01 category: main optional: false - name: widgetsnbextension @@ -23530,7 +23708,7 @@ package: category: main optional: false - name: yarl - version: 1.9.2 + version: 1.9.3 manager: conda platform: linux-64 dependencies: @@ -23539,14 +23717,14 @@ package: multidict: ">=4.0" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/linux-64/yarl-1.9.2-py311h459d7ec_1.conda + url: https://conda.anaconda.org/conda-forge/linux-64/yarl-1.9.3-py311h459d7ec_0.conda hash: - md5: 132637a291f818a0e99c8ca468e92eb8 - sha256: f25893b4c4e4432cdfa1c19631dd503e5f197704d2b9d09624520ece9a6845f0 + md5: 96f995652440b0a9266d66a691d9eff9 + sha256: 1e35fa508899965e2ed7866b6147330dd4e51407ce5359b74f050b0e2ef8e4d0 category: main optional: false - name: yarl - version: 1.9.2 + version: 1.9.3 manager: conda platform: osx-64 dependencies: @@ -23554,14 +23732,14 @@ package: multidict: ">=4.0" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-64/yarl-1.9.2-py311he705e18_1.conda + url: https://conda.anaconda.org/conda-forge/osx-64/yarl-1.9.3-py311he705e18_0.conda hash: - md5: ac4f2406c36c333c12544f6605563188 - sha256: 2f2a68c01850a0406e2ef71c07f8f4d7a338e6a98e906a042b5b7de48ba4a558 + md5: 9e496c26a50c1bd31ec870ec26c17115 + sha256: c774eecd3a122d5f4f75be527f1f6d5031e74496ec9d34c91165169f2ef892c5 category: main optional: false - name: yarl - version: 1.9.2 + version: 1.9.3 manager: conda platform: osx-arm64 dependencies: @@ -23569,10 +23747,10 @@ package: multidict: ">=4.0" python: ">=3.11,<3.12.0a0" python_abi: 3.11.* - url: https://conda.anaconda.org/conda-forge/osx-arm64/yarl-1.9.2-py311h05b510d_1.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/yarl-1.9.3-py311h05b510d_0.conda hash: - md5: ada6c2013b3616c82f8f090871aaecdc - sha256: eccb5dc2e3c6cf23ec7ca94f591cb7ab1bd362e5eba546a4d7e2bb8c219a93ec + md5: 1de51d3ce020a415f34ba5c678c3abcc + sha256: 723e809326eccda01b2704d8c2708de901a90f720528654b8c8f298dadaad700 category: main optional: false - name: zeromq diff --git a/environments/conda-osx-64.lock.yml b/environments/conda-osx-64.lock.yml index 341cba76ed..12d78b1972 100644 --- a/environments/conda-osx-64.lock.yml +++ b/environments/conda-osx-64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 407bb59ef7f138fbfd77c40e38a21952ba1e13e25f2a6a4e43f5bb21abb3fcdf +# input_hash: 39e56673d0def5503b315f36cec2b3e4bfd804758ec3bcea79dad417c6b146a2 channels: - conda-forge @@ -8,12 +8,12 @@ channels: dependencies: - aws-c-common=0.9.8=h10d778d_0 - bzip2=1.0.8=h10d778d_5 - - c-ares=1.22.1=h10d778d_0 + - c-ares=1.23.0=h10d778d_0 - ca-certificates=2023.11.17=h8857fd0_0 - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - font-ttf-inconsolata=3.000=h77eed37_0 - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 + - font-ttf-ubuntu=0.83=h77eed37_1 - fribidi=1.0.10=hbcb3906_0 - giflib=5.2.1=hb7f2c08_3 - icu=73.2=hf5e326d_0 @@ -32,7 +32,7 @@ dependencies: - libuv=1.46.0=h0c2f820_0 - libwebp-base=1.3.2=h0dc2134_0 - libzlib=1.2.13=h8a1eda9_5 - - llvm-openmp=17.0.5=hb6ac08f_0 + - llvm-openmp=17.0.6=hb6ac08f_0 - lzo=2.10=haf1e3a3_1000 - poppler-data=0.4.12=hd8ed1ab_0 - pthread-stubs=0.4=hc929b4f_1001 @@ -70,7 +70,7 @@ dependencies: - lz4-c=1.9.4=hf0c8a7f_0 - ncurses=6.4=h93d8f39_2 - nspr=4.35=hea0b92c_0 - - openssl=3.2.0=hd75f5a5_0 + - openssl=3.1.4=hd75f5a5_0 - pandoc=3.1.3=h9d075a6_0 - pcre2=10.42=h0ad2156_0 - pixman=0.42.2=he965462_0 @@ -103,7 +103,7 @@ dependencies: - libzip=1.10.1=hc158999_3 - minizip=4.0.3=h23f18a7_0 - nodejs=20.9.0=h9adec40_0 - - nss=3.94=hd6ac835_0 + - nss=3.95=hfeb00ea_0 - readline=8.2=h9e318b2_1 - atk-1.0=2.38.0=h1d18e73_1 - aws-c-event-stream=0.3.2=hdb93a3d_7 @@ -175,7 +175,7 @@ dependencies: - humanfriendly=10.0=pyhd8ed1ab_6 - hupper=1.12=pyhd8ed1ab_0 - hyperframe=6.0.1=pyhd8ed1ab_0 - - idna=3.5=pyhd8ed1ab_0 + - idna=3.6=pyhd8ed1ab_0 - ijson=3.2.3=pyhd8ed1ab_0 - imagesize=1.4.1=pyhd8ed1ab_0 - iniconfig=2.0.0=pyhd8ed1ab_0 @@ -222,7 +222,7 @@ dependencies: - psutil=5.9.5=py311h2725bcf_1 - ptyprocess=0.7.0=pyhd3deb0d_0 - pure_eval=0.2.2=pyhd8ed1ab_0 - - pyasn1=0.5.0=pyhd8ed1ab_0 + - pyasn1=0.5.1=pyhd8ed1ab_0 - pycparser=2.21=pyhd8ed1ab_0 - pygments=2.17.2=pyhd8ed1ab_0 - pyjwt=2.8.0=pyhd8ed1ab_0 @@ -243,7 +243,7 @@ dependencies: - regex=2023.10.3=py311h2725bcf_0 - rfc3986=2.0.0=pyhd8ed1ab_0 - rfc3986-validator=0.1.1=pyh9f0ad1d_0 - - rpds-py=0.13.1=py311h5e0f0e4_0 + - rpds-py=0.13.2=py311h5e0f0e4_0 - rtree=1.1.0=py311hbc1f44b_0 - ruamel.yaml.clib=0.2.7=py311h2725bcf_2 - ruff=0.1.6=py311hec6fdf1_0 @@ -267,12 +267,12 @@ dependencies: - toolz=0.12.0=pyhd8ed1ab_0 - toposort=1.10=pyhd8ed1ab_0 - tornado=6.3.3=py311h2725bcf_1 - - traitlets=5.13.0=pyhd8ed1ab_0 + - traitlets=5.14.0=pyhd8ed1ab_0 - types-python-dateutil=2.8.19.14=pyhd8ed1ab_0 - types-pyyaml=6.0.12.12=pyhd8ed1ab_0 - typing_extensions=4.8.0=pyha770c72_0 - typing_utils=0.1.0=pyhd8ed1ab_0 - - unicodecsv=0.14.1=py_1 + - unicodecsv=0.14.1=pyhd8ed1ab_2 - uri-template=1.3.0=pyhd8ed1ab_0 - uvloop=0.19.0=py311ha272bfe_0 - validators=0.22.0=pyhd8ed1ab_0 @@ -281,7 +281,7 @@ dependencies: - webencodings=0.5.1=pyhd8ed1ab_2 - websocket-client=1.6.4=pyhd8ed1ab_0 - websockets=10.4=py311h5547dcb_1 - - wheel=0.41.3=pyhd8ed1ab_0 + - wheel=0.42.0=pyhd8ed1ab_0 - widgetsnbextension=4.0.9=pyhd8ed1ab_0 - wrapt=1.16.0=py311he705e18_0 - xlrd=2.0.1=pyhd8ed1ab_3 @@ -309,6 +309,7 @@ dependencies: - coloredlogs=14.0=pyhd8ed1ab_3 - comm=0.1.4=pyhd8ed1ab_0 - coverage=7.3.2=py311h2725bcf_0 + - curl=8.4.0=h726d00d_0 - fonttools=4.45.1=py311he705e18_0 - gitdb=4.0.11=pyhd8ed1ab_0 - graphql-core=3.2.3=pyhd8ed1ab_0 @@ -318,7 +319,7 @@ dependencies: - harfbuzz=8.3.0=hf45c392_0 - hdf5=1.14.2=nompi_hedada53_100 - html5lib=1.1=pyh9f0ad1d_0 - - hypothesis=6.90.0=pyha770c72_0 + - hypothesis=6.91.0=pyha770c72_0 - importlib-metadata=6.8.0=pyha770c72_0 - importlib_resources=6.1.1=pyhd8ed1ab_0 - isodate=0.6.1=pyhd8ed1ab_0 @@ -358,7 +359,7 @@ dependencies: - python-slugify=8.0.1=pyhd8ed1ab_2 - pyu2f=0.1.5=pyhd8ed1ab_0 - qtpy=2.4.1=pyhd8ed1ab_0 - - referencing=0.31.0=pyhd8ed1ab_0 + - referencing=0.31.1=pyhd8ed1ab_0 - restructuredtext_lint=1.4.0=pyhd8ed1ab_0 - rfc3339-validator=0.1.4=pyhd8ed1ab_0 - rsa=4.9=pyhd8ed1ab_0 @@ -372,14 +373,15 @@ dependencies: - urllib3=1.26.18=pyhd8ed1ab_0 - watchdog=3.0.0=py311h5ef12f2_1 - xerces-c=3.2.4=h6314983_3 - - yarl=1.9.2=py311he705e18_1 + - yarl=1.9.3=py311he705e18_0 - addfips=0.4.0=pyhd8ed1ab_1 - aniso8601=9.0.1=pyhd8ed1ab_0 + - annotated-types=0.6.0=pyhd8ed1ab_0 - argon2-cffi-bindings=21.2.0=py311h2725bcf_4 - arrow=1.3.0=pyhd8ed1ab_0 - async-timeout=4.0.3=pyhd8ed1ab_0 - aws-crt-cpp=0.24.7=hf3941dc_6 - - botocore=1.32.6=pyhd8ed1ab_0 + - botocore=1.33.5=pyhd8ed1ab_0 - branca=0.7.0=pyhd8ed1ab_1 - croniter=2.0.1=pyhd8ed1ab_0 - cryptography=41.0.5=py311hd51016d_0 @@ -393,7 +395,7 @@ dependencies: - grpcio-health-checking=1.59.2=pyhd8ed1ab_0 - httpcore=1.0.2=pyhd8ed1ab_0 - importlib_metadata=6.8.0=hd8ed1ab_0 - - jsonschema-specifications=2023.11.1=pyhd8ed1ab_0 + - jsonschema-specifications=2023.11.2=pyhd8ed1ab_0 - jupyter_core=5.5.0=py311h6eed73b_0 - jupyter_server_terminals=0.4.4=pyhd8ed1ab_1 - kealib=1.5.2=h052fcf7_1 @@ -407,7 +409,7 @@ dependencies: - prompt_toolkit=3.0.41=hd8ed1ab_0 - psycopg2-binary=2.9.7=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_2 - - pydantic=1.10.13=py311he705e18_1 + - pydantic-core=2.14.5=py311h5e0f0e4_0 - pyobjc-framework-cocoa=10.0=py311hf110eff_1 - pyproj=3.6.1=py311he36daed_4 - pytest-console-scripts=1.4.1=pyhd8ed1ab_0 @@ -422,12 +424,12 @@ dependencies: - starlette=0.32.0.post1=pyhd8ed1ab_0 - tiledb=2.16.3=hd3a41d5_3 - ukkonen=1.0.1=py311h5fe6e05_4 - - uvicorn=0.24.0=py311h6eed73b_0 + - uvicorn=0.24.0.post1=py311h6eed73b_0 - virtualenv=20.24.7=pyhd8ed1ab_0 - - watchfiles=0.20.0=py311h299eb51_2 + - watchfiles=0.21.0=py311h5e0f0e4_0 - aiohttp=3.8.6=py311he705e18_1 - alembic=1.12.1=pyhd8ed1ab_0 - - arelle-release=2.17.4=pyhd8ed1ab_0 + - arelle-release=2.17.7=pyhd8ed1ab_0 - argon2-cffi=23.1.0=pyhd8ed1ab_0 - aws-sdk-cpp=1.11.182=h28d282b_7 - bottleneck=1.3.7=py311h4a70a88_1 @@ -442,9 +444,9 @@ dependencies: - grpcio-status=1.59.2=pyhd8ed1ab_0 - gtk2=2.24.33=h7c1209e_2 - h3-py=3.7.6=py311hdf8f085_1 - - httpx=0.25.1=pyhd8ed1ab_0 + - httpx=0.25.2=pyhd8ed1ab_0 - identify=2.5.32=pyhd8ed1ab_0 - - ipython=8.18.0=pyh31c8845_0 + - ipython=8.18.1=pyh31011fe_1 - isoduration=20.11.0=pyhd8ed1ab_0 - jsonschema=4.20.0=pyhd8ed1ab_0 - jupyter_client=8.6.0=pyhd8ed1ab_0 @@ -456,19 +458,20 @@ dependencies: - oauthlib=3.2.2=pyhd8ed1ab_0 - pandas=2.1.3=py311h1eadf79_0 - pybtex-docutils=1.0.3=py311h6eed73b_1 + - pydantic=2.5.2=pyhd8ed1ab_0 - pyopenssl=23.3.0=pyhd8ed1ab_0 - readthedocs-sphinx-ext=2.2.3=pyhd8ed1ab_0 - requests-toolbelt=0.10.1=pyhd8ed1ab_0 - responses=0.24.1=pyhd8ed1ab_0 - - s3transfer=0.7.0=pyhd8ed1ab_0 + - s3transfer=0.8.2=pyhd8ed1ab_0 - scipy=1.11.4=py311he0bea55_0 - send2trash=1.8.2=pyhd1c38e8_0 - shapely=2.0.2=py311h4c12f3d_1 - stevedore=5.1.0=pyhd8ed1ab_0 - typeguard=4.1.5=pyhd8ed1ab_1 - typer=0.9.0=pyhd8ed1ab_0 - - uvicorn-standard=0.24.0=h6eed73b_0 - - boto3=1.29.6=pyhd8ed1ab_0 + - uvicorn-standard=0.24.0.post1=h6eed73b_0 + - boto3=1.33.5=pyhd8ed1ab_0 - cachecontrol-with-filecache=0.13.1=pyhd8ed1ab_0 - dagster=1.5.9=pyhd8ed1ab_0 - datasette=0.64.4=pyhd8ed1ab_1 @@ -477,7 +480,7 @@ dependencies: - frictionless=4.40.8=pyh6c4a22f_0 - gdal=3.8.0=py311h5646c56_6 - geopandas-base=0.14.1=pyha770c72_0 - - google-auth=2.23.4=pyhca7485f_0 + - google-auth=2.24.0=pyhca7485f_0 - gql-with-requests=3.4.1=pyhd8ed1ab_0 - graphviz=9.0.0=hee74176_1 - ipykernel=6.26.0=pyh3cd1d5f_0 @@ -488,13 +491,14 @@ dependencies: - nbformat=5.9.2=pyhd8ed1ab_0 - pandera-core=0.17.2=pyhd8ed1ab_1 - pre-commit=3.5.0=pyha770c72_0 + - pydantic-settings=2.1.0=pyhd8ed1ab_1 - requests-oauthlib=1.3.1=pyhd8ed1ab_0 - scikit-learn=1.3.2=py311h66081b9_1 - timezonefinder=6.2.0=py311he705e18_2 - - catalystcoop.ferc_xbrl_extractor=1.2.1=pyhd8ed1ab_0 + - catalystcoop.ferc_xbrl_extractor=1.3.1=pyhd8ed1ab_0 - conda-lock=2.5.1=pyhd8ed1ab_0 - dagster-graphql=1.5.9=pyhd8ed1ab_0 - - dagster-postgres=0.21.9=pyhd8ed1ab_0 + - dagster-postgres=0.21.9=pyhd8ed1ab_1 - fiona=1.9.5=py311h809632c_1 - google-api-core=2.14.0=pyhd8ed1ab_0 - google-auth-oauthlib=1.1.0=pyhd8ed1ab_0 @@ -519,11 +523,11 @@ dependencies: - tableschema=1.19.3=pyh9f0ad1d_0 - datapackage=1.15.2=pyh44b312d_0 - google-cloud-storage=2.13.0=pyhca7485f_0 - - jupyter_server=2.10.1=pyhd8ed1ab_0 + - jupyter_server=2.11.1=pyhd8ed1ab_0 - libarrow-substrait=14.0.1=h2cc6c1c_3_cpu - nbconvert-pandoc=7.11.0=pyhd8ed1ab_0 - gcsfs=2023.10.0=pyhd8ed1ab_0 - - jupyter-lsp=2.2.0=pyhd8ed1ab_0 + - jupyter-lsp=2.2.1=pyhd8ed1ab_0 - jupyter-resource-usage=1.0.1=pyhd8ed1ab_0 - jupyterlab_server=2.25.2=pyhd8ed1ab_0 - nbconvert=7.11.0=pyhd8ed1ab_0 diff --git a/environments/conda-osx-arm64.lock.yml b/environments/conda-osx-arm64.lock.yml index a641f4d001..4cd3f6d3c9 100644 --- a/environments/conda-osx-arm64.lock.yml +++ b/environments/conda-osx-arm64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-arm64 -# input_hash: 3f9ca7f82365963841501c752f82c8cd7577658349f7f07a6c4b77f764b97b41 +# input_hash: c275fe8ff3012ad83a98252ba570f9b278f142720b73f42abfd4c4a1107f67e2 channels: - conda-forge @@ -8,12 +8,12 @@ channels: dependencies: - aws-c-common=0.9.8=h93a5062_0 - bzip2=1.0.8=h93a5062_5 - - c-ares=1.22.1=h93a5062_0 + - c-ares=1.23.0=h93a5062_0 - ca-certificates=2023.11.17=hf0a4a13_0 - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - font-ttf-inconsolata=3.000=h77eed37_0 - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 + - font-ttf-ubuntu=0.83=h77eed37_1 - fribidi=1.0.10=h27ca646_0 - giflib=5.2.1=h1a8c8d9_3 - icu=73.2=hc8870d7_0 @@ -32,7 +32,7 @@ dependencies: - libuv=1.46.0=hb547adb_0 - libwebp-base=1.3.2=hb547adb_0 - libzlib=1.2.13=h53f4e23_5 - - llvm-openmp=17.0.5=hcd81f8e_0 + - llvm-openmp=17.0.6=hcd81f8e_0 - lzo=2.10=h642e427_1000 - pandoc=3.1.3=hce30654_0 - poppler-data=0.4.12=hd8ed1ab_0 @@ -71,7 +71,7 @@ dependencies: - lz4-c=1.9.4=hb7217d7_0 - ncurses=6.4=h463b476_2 - nspr=4.35=hb7217d7_0 - - openssl=3.2.0=h0d3ecfb_0 + - openssl=3.1.4=h0d3ecfb_0 - pcre2=10.42=h26f9a81_0 - pixman=0.42.2=h13dd4ca_0 - snappy=1.1.10=h17c5cce_0 @@ -103,7 +103,7 @@ dependencies: - libzip=1.10.1=ha0bc3c6_3 - minizip=4.0.3=hd5cad61_0 - nodejs=20.9.0=h0950e01_0 - - nss=3.94=hc6b9969_0 + - nss=3.95=h6cf673f_0 - readline=8.2=h92ec313_1 - atk-1.0=2.38.0=hcb7b3dd_1 - aws-c-event-stream=0.3.2=h0574dc0_7 @@ -175,7 +175,7 @@ dependencies: - humanfriendly=10.0=pyhd8ed1ab_6 - hupper=1.12=pyhd8ed1ab_0 - hyperframe=6.0.1=pyhd8ed1ab_0 - - idna=3.5=pyhd8ed1ab_0 + - idna=3.6=pyhd8ed1ab_0 - ijson=3.2.3=pyhd8ed1ab_0 - imagesize=1.4.1=pyhd8ed1ab_0 - iniconfig=2.0.0=pyhd8ed1ab_0 @@ -222,7 +222,7 @@ dependencies: - psutil=5.9.5=py311heffc1b2_1 - ptyprocess=0.7.0=pyhd3deb0d_0 - pure_eval=0.2.2=pyhd8ed1ab_0 - - pyasn1=0.5.0=pyhd8ed1ab_0 + - pyasn1=0.5.1=pyhd8ed1ab_0 - pycparser=2.21=pyhd8ed1ab_0 - pygments=2.17.2=pyhd8ed1ab_0 - pyjwt=2.8.0=pyhd8ed1ab_0 @@ -243,7 +243,7 @@ dependencies: - regex=2023.10.3=py311heffc1b2_0 - rfc3986=2.0.0=pyhd8ed1ab_0 - rfc3986-validator=0.1.1=pyh9f0ad1d_0 - - rpds-py=0.13.1=py311h94f323b_0 + - rpds-py=0.13.2=py311h94f323b_0 - rtree=1.1.0=py311hd698ff7_0 - ruamel.yaml.clib=0.2.7=py311heffc1b2_2 - ruff=0.1.6=py311h6fc163c_0 @@ -267,12 +267,12 @@ dependencies: - toolz=0.12.0=pyhd8ed1ab_0 - toposort=1.10=pyhd8ed1ab_0 - tornado=6.3.3=py311heffc1b2_1 - - traitlets=5.13.0=pyhd8ed1ab_0 + - traitlets=5.14.0=pyhd8ed1ab_0 - types-python-dateutil=2.8.19.14=pyhd8ed1ab_0 - types-pyyaml=6.0.12.12=pyhd8ed1ab_0 - typing_extensions=4.8.0=pyha770c72_0 - typing_utils=0.1.0=pyhd8ed1ab_0 - - unicodecsv=0.14.1=py_1 + - unicodecsv=0.14.1=pyhd8ed1ab_2 - uri-template=1.3.0=pyhd8ed1ab_0 - uvloop=0.19.0=py311h05b510d_0 - validators=0.22.0=pyhd8ed1ab_0 @@ -281,7 +281,7 @@ dependencies: - webencodings=0.5.1=pyhd8ed1ab_2 - websocket-client=1.6.4=pyhd8ed1ab_0 - websockets=10.4=py311he2be06e_1 - - wheel=0.41.3=pyhd8ed1ab_0 + - wheel=0.42.0=pyhd8ed1ab_0 - widgetsnbextension=4.0.9=pyhd8ed1ab_0 - wrapt=1.16.0=py311h05b510d_0 - xlrd=2.0.1=pyhd8ed1ab_3 @@ -309,6 +309,7 @@ dependencies: - coloredlogs=14.0=pyhd8ed1ab_3 - comm=0.1.4=pyhd8ed1ab_0 - coverage=7.3.2=py311heffc1b2_0 + - curl=8.4.0=h2d989ff_0 - fonttools=4.45.1=py311h05b510d_0 - gitdb=4.0.11=pyhd8ed1ab_0 - graphql-core=3.2.3=pyhd8ed1ab_0 @@ -318,7 +319,7 @@ dependencies: - harfbuzz=8.3.0=h8f0ba13_0 - hdf5=1.14.2=nompi_h3aba7b3_100 - html5lib=1.1=pyh9f0ad1d_0 - - hypothesis=6.90.0=pyha770c72_0 + - hypothesis=6.91.0=pyha770c72_0 - importlib-metadata=6.8.0=pyha770c72_0 - importlib_resources=6.1.1=pyhd8ed1ab_0 - isodate=0.6.1=pyhd8ed1ab_0 @@ -358,7 +359,7 @@ dependencies: - python-slugify=8.0.1=pyhd8ed1ab_2 - pyu2f=0.1.5=pyhd8ed1ab_0 - qtpy=2.4.1=pyhd8ed1ab_0 - - referencing=0.31.0=pyhd8ed1ab_0 + - referencing=0.31.1=pyhd8ed1ab_0 - restructuredtext_lint=1.4.0=pyhd8ed1ab_0 - rfc3339-validator=0.1.4=pyhd8ed1ab_0 - rsa=4.9=pyhd8ed1ab_0 @@ -372,14 +373,15 @@ dependencies: - urllib3=1.26.18=pyhd8ed1ab_0 - watchdog=3.0.0=py311heffc1b2_1 - xerces-c=3.2.4=hd886eac_3 - - yarl=1.9.2=py311h05b510d_1 + - yarl=1.9.3=py311h05b510d_0 - addfips=0.4.0=pyhd8ed1ab_1 - aniso8601=9.0.1=pyhd8ed1ab_0 + - annotated-types=0.6.0=pyhd8ed1ab_0 - argon2-cffi-bindings=21.2.0=py311heffc1b2_4 - arrow=1.3.0=pyhd8ed1ab_0 - async-timeout=4.0.3=pyhd8ed1ab_0 - aws-crt-cpp=0.24.7=hba4ac3b_6 - - botocore=1.32.6=pyhd8ed1ab_0 + - botocore=1.33.5=pyhd8ed1ab_0 - branca=0.7.0=pyhd8ed1ab_1 - croniter=2.0.1=pyhd8ed1ab_0 - cryptography=41.0.5=py311h71175c2_0 @@ -393,7 +395,7 @@ dependencies: - grpcio-health-checking=1.59.2=pyhd8ed1ab_0 - httpcore=1.0.2=pyhd8ed1ab_0 - importlib_metadata=6.8.0=hd8ed1ab_0 - - jsonschema-specifications=2023.11.1=pyhd8ed1ab_0 + - jsonschema-specifications=2023.11.2=pyhd8ed1ab_0 - jupyter_core=5.5.0=py311h267d04e_0 - jupyter_server_terminals=0.4.4=pyhd8ed1ab_1 - kealib=1.5.2=h47b5e36_1 @@ -407,7 +409,7 @@ dependencies: - prompt_toolkit=3.0.41=hd8ed1ab_0 - psycopg2-binary=2.9.7=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_2 - - pydantic=1.10.13=py311h05b510d_1 + - pydantic-core=2.14.5=py311h94f323b_0 - pyobjc-framework-cocoa=10.0=py311hb702dc4_1 - pyproj=3.6.1=py311h20a9b75_4 - pytest-console-scripts=1.4.1=pyhd8ed1ab_0 @@ -422,12 +424,12 @@ dependencies: - starlette=0.32.0.post1=pyhd8ed1ab_0 - tiledb=2.16.3=he15c4da_3 - ukkonen=1.0.1=py311he4fd1f5_4 - - uvicorn=0.24.0=py311h267d04e_0 + - uvicorn=0.24.0.post1=py311h267d04e_0 - virtualenv=20.24.7=pyhd8ed1ab_0 - - watchfiles=0.20.0=py311h0563b04_2 + - watchfiles=0.21.0=py311h94f323b_0 - aiohttp=3.8.6=py311h05b510d_1 - alembic=1.12.1=pyhd8ed1ab_0 - - arelle-release=2.17.4=pyhd8ed1ab_0 + - arelle-release=2.17.7=pyhd8ed1ab_0 - argon2-cffi=23.1.0=pyhd8ed1ab_0 - aws-sdk-cpp=1.11.182=h31542fa_7 - bottleneck=1.3.7=py311hb49d859_1 @@ -442,9 +444,9 @@ dependencies: - grpcio-status=1.59.2=pyhd8ed1ab_0 - gtk2=2.24.33=h57013de_2 - h3-py=3.7.6=py311ha891d26_1 - - httpx=0.25.1=pyhd8ed1ab_0 + - httpx=0.25.2=pyhd8ed1ab_0 - identify=2.5.32=pyhd8ed1ab_0 - - ipython=8.18.0=pyh31c8845_0 + - ipython=8.18.1=pyh31011fe_1 - isoduration=20.11.0=pyhd8ed1ab_0 - jsonschema=4.20.0=pyhd8ed1ab_0 - jupyter_client=8.6.0=pyhd8ed1ab_0 @@ -456,19 +458,20 @@ dependencies: - oauthlib=3.2.2=pyhd8ed1ab_0 - pandas=2.1.3=py311h6e08293_0 - pybtex-docutils=1.0.3=py311h267d04e_1 + - pydantic=2.5.2=pyhd8ed1ab_0 - pyopenssl=23.3.0=pyhd8ed1ab_0 - readthedocs-sphinx-ext=2.2.3=pyhd8ed1ab_0 - requests-toolbelt=0.10.1=pyhd8ed1ab_0 - responses=0.24.1=pyhd8ed1ab_0 - - s3transfer=0.7.0=pyhd8ed1ab_0 + - s3transfer=0.8.2=pyhd8ed1ab_0 - scipy=1.11.4=py311h2b215a9_0 - send2trash=1.8.2=pyhd1c38e8_0 - shapely=2.0.2=py311h0815064_1 - stevedore=5.1.0=pyhd8ed1ab_0 - typeguard=4.1.5=pyhd8ed1ab_1 - typer=0.9.0=pyhd8ed1ab_0 - - uvicorn-standard=0.24.0=ha1ab1f8_0 - - boto3=1.29.6=pyhd8ed1ab_0 + - uvicorn-standard=0.24.0.post1=ha1ab1f8_0 + - boto3=1.33.5=pyhd8ed1ab_0 - cachecontrol-with-filecache=0.13.1=pyhd8ed1ab_0 - dagster=1.5.9=pyhd8ed1ab_0 - datasette=0.64.4=pyhd8ed1ab_1 @@ -477,7 +480,7 @@ dependencies: - frictionless=4.40.8=pyh6c4a22f_0 - gdal=3.8.0=py311h32a4f3d_6 - geopandas-base=0.14.1=pyha770c72_0 - - google-auth=2.23.4=pyhca7485f_0 + - google-auth=2.24.0=pyhca7485f_0 - gql-with-requests=3.4.1=pyhd8ed1ab_0 - graphviz=9.0.0=h3face73_1 - ipykernel=6.26.0=pyh3cd1d5f_0 @@ -488,13 +491,14 @@ dependencies: - nbformat=5.9.2=pyhd8ed1ab_0 - pandera-core=0.17.2=pyhd8ed1ab_1 - pre-commit=3.5.0=pyha770c72_0 + - pydantic-settings=2.1.0=pyhd8ed1ab_1 - requests-oauthlib=1.3.1=pyhd8ed1ab_0 - scikit-learn=1.3.2=py311ha25ca4d_1 - timezonefinder=6.2.0=py311h05b510d_2 - - catalystcoop.ferc_xbrl_extractor=1.2.1=pyhd8ed1ab_0 + - catalystcoop.ferc_xbrl_extractor=1.3.1=pyhd8ed1ab_0 - conda-lock=2.5.1=pyhd8ed1ab_0 - dagster-graphql=1.5.9=pyhd8ed1ab_0 - - dagster-postgres=0.21.9=pyhd8ed1ab_0 + - dagster-postgres=0.21.9=pyhd8ed1ab_1 - fiona=1.9.5=py311h4760b73_1 - google-api-core=2.14.0=pyhd8ed1ab_0 - google-auth-oauthlib=1.1.0=pyhd8ed1ab_0 @@ -519,11 +523,11 @@ dependencies: - tableschema=1.19.3=pyh9f0ad1d_0 - datapackage=1.15.2=pyh44b312d_0 - google-cloud-storage=2.13.0=pyhca7485f_0 - - jupyter_server=2.10.1=pyhd8ed1ab_0 + - jupyter_server=2.11.1=pyhd8ed1ab_0 - libarrow-substrait=14.0.1=h594d712_3_cpu - nbconvert-pandoc=7.11.0=pyhd8ed1ab_0 - gcsfs=2023.10.0=pyhd8ed1ab_0 - - jupyter-lsp=2.2.0=pyhd8ed1ab_0 + - jupyter-lsp=2.2.1=pyhd8ed1ab_0 - jupyter-resource-usage=1.0.1=pyhd8ed1ab_0 - jupyterlab_server=2.25.2=pyhd8ed1ab_0 - nbconvert=7.11.0=pyhd8ed1ab_0 diff --git a/migrations/versions/7febe79b8760_add_utility_type_to_liabilities_side.py b/migrations/versions/7febe79b8760_add_utility_type_to_liabilities_side.py new file mode 100644 index 0000000000..91c942f415 --- /dev/null +++ b/migrations/versions/7febe79b8760_add_utility_type_to_liabilities_side.py @@ -0,0 +1,89 @@ +"""add utility type to liabilities side + +Revision ID: 7febe79b8760 +Revises: ad3e7ecdcfc3 +Create Date: 2023-09-28 11:10:01.521413 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "7febe79b8760" +down_revision = "ad3e7ecdcfc3" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table( + "balance_sheet_liabilities_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "denorm_balance_sheet_liabilities_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "denorm_retained_earnings_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table("retained_earnings_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("retained_earnings_ferc1", schema=None) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "denorm_retained_earnings_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "denorm_balance_sheet_liabilities_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "balance_sheet_liabilities_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + # ### end Alembic commands ### diff --git a/migrations/versions/ad3e7ecdcfc3_add_utility_type.py b/migrations/versions/ad3e7ecdcfc3_add_utility_type.py new file mode 100644 index 0000000000..87288303b5 --- /dev/null +++ b/migrations/versions/ad3e7ecdcfc3_add_utility_type.py @@ -0,0 +1,81 @@ +"""Add utility type + +Revision ID: ad3e7ecdcfc3 +Revises: 3313ca078f4e +Create Date: 2023-09-27 15:06:27.671649 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "ad3e7ecdcfc3" +down_revision = "3313ca078f4e" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("balance_sheet_assets_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "denorm_balance_sheet_assets_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table("denorm_plant_in_service_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "plant_status", + sa.Text(), + nullable=True, + comment="Utility plant financial status (in service, future, leased, total).", + ) + ) + + with op.batch_alter_table("plant_in_service_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "plant_status", + sa.Text(), + nullable=True, + comment="Utility plant financial status (in service, future, leased, total).", + ) + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("plant_in_service_ferc1", schema=None) as batch_op: + batch_op.drop_column("plant_status") + + with op.batch_alter_table("denorm_plant_in_service_ferc1", schema=None) as batch_op: + batch_op.drop_column("plant_status") + + with op.batch_alter_table( + "denorm_balance_sheet_assets_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table("balance_sheet_assets_ferc1", schema=None) as batch_op: + batch_op.drop_column("utility_type") + + # ### end Alembic commands ### diff --git a/pyproject.toml b/pyproject.toml index 1d250f22e5..7a970bd38b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] build-backend = "setuptools.build_meta" -requires = ["setuptools>=66,<69", "setuptools_scm[toml]>=3.5.0", "wheel"] +requires = ["setuptools>=66", "setuptools_scm[toml]>=3.5.0", "wheel"] [project] name = "catalystcoop.pudl" @@ -18,6 +18,7 @@ dependencies = [ "build>=1.0", "catalystcoop.dbfread>=3.0,<3.1", "catalystcoop.ferc-xbrl-extractor>=1.2.0,<2", + "click>=8.1", "coloredlogs>=14.0", # Dagster requires 14.0 "conda-lock>=2.5.1", "coverage>=7", @@ -50,10 +51,8 @@ dependencies = [ "pandera>=0.17.2", "pre-commit>=3", "pyarrow>=14.0.1", # pandas[parquet] - "pydantic>=1.10,<2", - # Required after pandera-core is retired and we switch to pydantic v2 - #"pydantic>=2.4", - #"pydantic-settings>=2", + "pydantic>=2.4", + "pydantic-settings>=2", "pytest>=7.4", "pytest-cov>=4.1", "pytest-console-scripts>=1.4", @@ -220,10 +219,6 @@ exclude = ["migrations/versions/*"] "test/*" = ["D"] "migrations/*" = ["D", "Q"] -[tool.ruff.pep8-naming] -# Allow Pydantic's `@validator` decorator to trigger class method treatment. -classmethod-decorators = ["pydantic.validator", "pydantic.root_validator"] - [tool.ruff.isort] known-first-party = ["pudl"] @@ -258,6 +253,15 @@ filterwarnings = [ "ignore:Subclassing validator classes:DeprecationWarning:tableschema", "ignore:The Shapely GEOS version:UserWarning:geopandas[.*]", "ignore:Unknown extension:UserWarning:openpyxl.worksheet[.*]", + "ignore:The `__fields__` attribute is deprecated:pydantic.PydanticDeprecatedSince20:unittest.mock", + "ignore:The `__fields_set__` attribute is deprecated:pydantic.PydanticDeprecatedSince20:unittest.mock", + "ignore:The `__fields__` attribute is deprecated:pydantic.PydanticDeprecatedSince20:pydantic.main", + "ignore:The `__fields_set__` attribute is deprecated:pydantic.PydanticDeprecatedSince20:pydantic.main", + "ignore:The `update_forward_refs` method is deprecated:pydantic.PydanticDeprecatedSince20:pydantic.main", + "ignore:Support for class-based `config` is deprecated:pydantic.PydanticDeprecatedSince20:pydantic._internal._config", + "ignore:Pydantic V1 style `@validator` validators are deprecated:pydantic.PydanticDeprecatedSince20:ferc_xbrl_extractor.instance", + "ignore:The `update_forward_refs` method is deprecated:pydantic.PydanticDeprecatedSince20:ferc_xbrl_extractor.taxonomy", + "once:Could not infer format, so each element will be parsed individually, falling back to `dateutil`.:UserWarning:pudl.extract.eia_bulk_elec", "once:In a future version:FutureWarning:pudl.helpers", "once:open_binary is deprecated:DeprecationWarning:pudl.glue.ferc1_eia", "once:open_text is deprecated:DeprecationWarning:pudl.glue.ferc1_eia", @@ -295,3 +299,4 @@ pip = ">=23" prettier = ">=3.0" python = ">=3.11,<3.12" sqlite = ">=3.43" +curl = ">=8.4.0" diff --git a/src/pudl/__init__.py b/src/pudl/__init__.py index 17048a6e3c..a578d8f855 100644 --- a/src/pudl/__init__.py +++ b/src/pudl/__init__.py @@ -8,6 +8,7 @@ convert, etl, extract, + ferc_to_sqlite, glue, helpers, io_managers, diff --git a/src/pudl/analysis/timeseries_cleaning.py b/src/pudl/analysis/timeseries_cleaning.py index 9d85fe9db9..3ec0ee2651 100644 --- a/src/pudl/analysis/timeseries_cleaning.py +++ b/src/pudl/analysis/timeseries_cleaning.py @@ -1096,7 +1096,7 @@ def summarize_flags(self) -> pd.DataFrame: ) df = pd.concat(stats, names=["column", "flag"]).reset_index() # Sort flags by flagged order - ordered = df["flag"].astype(pd.CategoricalDtype(pd.unique(self.flagged))) + ordered = df["flag"].astype(pd.CategoricalDtype(set(self.flagged))) return df.assign(flag=ordered).sort_values(["column", "flag"]) def plot_flags(self, name: Any = 0) -> None: diff --git a/src/pudl/cli/etl.py b/src/pudl/cli/etl.py index 604c754ee8..9972ff1ff3 100644 --- a/src/pudl/cli/etl.py +++ b/src/pudl/cli/etl.py @@ -26,7 +26,7 @@ ) import pudl -from pudl.settings import EtlSettings +from pudl.settings import EpaCemsSettings, EtlSettings from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -116,7 +116,7 @@ def main(): etl_settings = EtlSettings.from_yaml(args.settings_file) - dataset_settings_config = etl_settings.datasets.dict() + dataset_settings_config = etl_settings.datasets.model_dump() process_epacems = True if etl_settings.datasets.epacems is None: process_epacems = False @@ -124,7 +124,7 @@ def main(): # the CEMS assets will not be executed. Fill in the config dictionary # with default cems values. Replace this workaround once dagster pydantic # config classes are available. - dataset_settings_config["epacems"] = pudl.settings.EpaCemsSettings().dict() + dataset_settings_config["epacems"] = EpaCemsSettings().model_dump() pudl_etl_reconstructable_job = build_reconstructable_job( "pudl.cli.etl", diff --git a/src/pudl/convert/censusdp1tract_to_sqlite.py b/src/pudl/convert/censusdp1tract_to_sqlite.py index 81dd0ce399..12ae282386 100644 --- a/src/pudl/convert/censusdp1tract_to_sqlite.py +++ b/src/pudl/convert/censusdp1tract_to_sqlite.py @@ -1,17 +1,10 @@ """Convert the US Census DP1 ESRI GeoDatabase into an SQLite Database. -This is a thin wrapper around the GDAL ogr2ogr command line tool. We use it -to convert the Census DP1 data which is distributed as an ESRI GeoDB into an -SQLite DB. The module provides ogr2ogr with the Census DP 1 data from the -PUDL datastore, and directs it to be output into the user's SQLite directory -alongside our other SQLite Databases (ferc1.sqlite and pudl.sqlite) - -Note that the ogr2ogr command line utility must be available on the user's -system for this to work. This tool is part of the ``pudl-dev`` conda -environment, but if you are using PUDL outside of the conda environment, you -will need to install ogr2ogr separately. On Debian Linux based systems such -as Ubuntu it can be installed with ``sudo apt-get install gdal-bin`` (which -is what we do in our CI setup and Docker images.) +This is a thin wrapper around the GDAL ogr2ogr command line tool. We use it to convert +the Census DP1 data which is distributed as an ESRI GeoDB into an SQLite DB. The module +provides ogr2ogr with the Census DP 1 data from the PUDL datastore, and writes the +output into the output directory indicated by the ``$PUDL_OUTPUT`` environment variable +alongside the FERC and PUDL SQLite databases. """ import os diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index e62d1003c7..bb8fcddc1a 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -21,6 +21,7 @@ from pudl.settings import EtlSettings from . import ( + check_foreign_keys, eia_bulk_elec_assets, epacems_assets, glue_assets, @@ -115,7 +116,7 @@ def load_dataset_settings_from_file(setting_filename: str) -> dict: dataset_settings = EtlSettings.from_yaml( importlib.resources.files("pudl.package_data.settings") / f"{setting_filename}.yml" - ).datasets.dict() + ).datasets.model_dump() return dataset_settings diff --git a/src/pudl/etl/analysis_assets.py b/src/pudl/etl/analysis_assets.py deleted file mode 100644 index a605000bb9..0000000000 --- a/src/pudl/etl/analysis_assets.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Derived / analysis assets that aren't simple to construct. - -This is really too large & generic of a category. Should we have an asset group for each -set of related analyses? E.g. - -* mcoe_assets -* service_territory_assets -* heat_rate_assets -* state_demand_assets -* depreciation_assets -* plant_parts_eia_assets -* ferc1_eia_record_linkage_assets - -Not sure what the right organization is but they'll be defined across a bunch of -different modules. Eventually I imagine these would just be the novel derived values, -probably in pretty skinny tables, which get joined / aggregated with other data in the -denormalized tables. -""" -import pandas as pd -from dagster import asset - -import pudl - -logger = pudl.logging_helpers.get_logger(__name__) - - -@asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") -def utility_analysis(utils_eia860: pd.DataFrame) -> pd.DataFrame: - """Example of how to create an analysis table that depends on an output view. - - This final dataframe will be written to the database (without a schema). - """ - # Do some analysis on utils_eia860 - return utils_eia860 diff --git a/src/pudl/extract/dbf.py b/src/pudl/extract/dbf.py index e48b9c3f25..1c296aaea6 100644 --- a/src/pudl/extract/dbf.py +++ b/src/pudl/extract/dbf.py @@ -2,6 +2,7 @@ import contextlib import csv import importlib.resources +import warnings import zipfile from collections import defaultdict from collections.abc import Iterator @@ -533,7 +534,14 @@ def aggregate_table_frames( """ if not dfs: return None - return pd.concat([df.df for df in dfs]) + with warnings.catch_warnings(): + warnings.filterwarnings( + action="ignore", + category=FutureWarning, + message="The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", + ) + aggregated_df = pd.concat([df.df for df in dfs]) + return aggregated_df def load_table_data(self): """Loads all tables from fox pro database and writes them to sqlite.""" diff --git a/src/pudl/extract/eia_bulk_elec.py b/src/pudl/extract/eia_bulk_elec.py index cf8491974d..d19e5adfbd 100644 --- a/src/pudl/extract/eia_bulk_elec.py +++ b/src/pudl/extract/eia_bulk_elec.py @@ -11,6 +11,7 @@ tables: one of metadata and one of timeseries. That is the format delivered by this module. """ +import warnings from io import BytesIO from pathlib import Path @@ -69,14 +70,23 @@ def _parse_data_column(elec_df: pd.DataFrame) -> pd.DataFrame: is_monthly = ( data_df.iloc[0:5, data_df.columns.get_loc("date")].str.match(r"\d{6}").all() ) - if is_monthly: - data_df.loc[:, "date"] = pd.to_datetime( - data_df.loc[:, "date"], format="%Y%m", errors="raise" - ) - else: - data_df.loc[:, "date"] = pd.to_datetime( - data_df.loc[:, "date"], errors="raise" + # Unfortunately, the date formats in the EIA bulk electricity data are not + # uniform, and so for now we need to fall back on dateutil. Currently this + # warning is emitted thousands of times, clogging up the logs. + with warnings.catch_warnings(): + warnings.filterwarnings( + action="ignore", + message="Could not infer format, so each element will be parsed individually", + category=UserWarning, ) + if is_monthly: + data_df.loc[:, "date"] = pd.to_datetime( + data_df.loc[:, "date"], format="%Y%m", errors="raise" + ) + else: + data_df.loc[:, "date"] = pd.to_datetime( + data_df.loc[:, "date"], errors="raise" + ) data_df["series_id"] = elec_df.at[idx, "series_id"] out.append(data_df) out = pd.concat(out, ignore_index=True, axis=0) diff --git a/src/pudl/extract/excel.py b/src/pudl/extract/excel.py index 7843798d69..f9e9a5b0ea 100644 --- a/src/pudl/extract/excel.py +++ b/src/pudl/extract/excel.py @@ -2,6 +2,7 @@ import importlib.resources import pathlib from collections import defaultdict +from io import BytesIO import dbfread import pandas as pd @@ -353,7 +354,7 @@ def load_excel_file(self, page, **partition): ) excel_file = pudl.helpers.convert_df_to_excel_file(df, index=False) else: - excel_file = pd.ExcelFile(zf.read(xlsx_filename)) + excel_file = pd.ExcelFile(BytesIO(zf.read(xlsx_filename))) finally: self._file_cache[xlsx_filename] = excel_file # TODO(rousik): this _file_cache could be replaced with @cache or @memoize annotations diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index 386c6e78a1..a7eb666c07 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -235,7 +235,7 @@ class Ferc1DbfExtractor(FercDbfExtractor): """Wrapper for running the foxpro to sqlite conversion of FERC1 dataset.""" DATASET = "ferc1" - DATABASE_NAME = "ferc1.sqlite" + DATABASE_NAME = "ferc1_dbf.sqlite" def get_settings( self, global_settings: FercToSqliteSettings @@ -450,7 +450,7 @@ def extract_dbf_generic( Args: table_names: The name of the raw dbf tables you want to combine under dbf. These are the tables you want to combine. - io_manager: IO Manager that extracts tables from ferc1.sqlite as dataframes. + io_manager: IO Manager that reads tables out of ``ferc1_dbf.sqlite``. dataset_settings: object containing desired years to extract. Return: @@ -478,7 +478,7 @@ def extract_xbrl_generic( Args: table_names: The name of the raw dbf tables you want to combine under xbrl. These are the tables you want to combine. - io_manager: IO Manager that extracts tables from ferc1.sqlite as dataframes. + io_manager: IO Manager that reads tables out of ``ferc1_xbrl.sqlite``. dataset_settings: object containing desired years to extract. period: Either duration or instant, specific to xbrl data. diff --git a/src/pudl/extract/ferc2.py b/src/pudl/extract/ferc2.py index ccdb338d22..baadd678e2 100644 --- a/src/pudl/extract/ferc2.py +++ b/src/pudl/extract/ferc2.py @@ -31,7 +31,7 @@ class Ferc2DbfExtractor(FercDbfExtractor): """Wrapper for running the foxpro to sqlite conversion of FERC1 dataset.""" DATASET = "ferc2" - DATABASE_NAME = "ferc2.sqlite" + DATABASE_NAME = "ferc2_dbf.sqlite" def get_settings( self: Self, global_settings: FercToSqliteSettings diff --git a/src/pudl/extract/ferc6.py b/src/pudl/extract/ferc6.py index 28213d4830..f1fa81bda3 100644 --- a/src/pudl/extract/ferc6.py +++ b/src/pudl/extract/ferc6.py @@ -16,7 +16,7 @@ class Ferc6DbfExtractor(FercDbfExtractor): """Extracts FERC Form 6 data from the legacy DBF archives.""" DATASET = "ferc6" - DATABASE_NAME = "ferc6.sqlite" + DATABASE_NAME = "ferc6_dbf.sqlite" def get_settings( self, global_settings: FercToSqliteSettings diff --git a/src/pudl/extract/ferc60.py b/src/pudl/extract/ferc60.py index e3bbb5ecb4..910ca85aa5 100644 --- a/src/pudl/extract/ferc60.py +++ b/src/pudl/extract/ferc60.py @@ -16,7 +16,7 @@ class Ferc60DbfExtractor(FercDbfExtractor): """Extracts FERC Form 60 data from the legacy DBF archives.""" DATASET = "ferc60" - DATABASE_NAME = "ferc60.sqlite" + DATABASE_NAME = "ferc60_dbf.sqlite" def get_settings( self, global_settings: FercToSqliteSettings diff --git a/src/pudl/ferc_to_sqlite/__init__.py b/src/pudl/ferc_to_sqlite/__init__.py index cb08895605..d9dd5b48ef 100644 --- a/src/pudl/ferc_to_sqlite/__init__.py +++ b/src/pudl/ferc_to_sqlite/__init__.py @@ -51,7 +51,7 @@ def ferc_to_sqlite_xbrl_only(): config={ "resources": { "ferc_to_sqlite_settings": { - "config": ferc_to_sqlite_fast_settings.dict(), + "config": ferc_to_sqlite_fast_settings.model_dump(), }, }, }, diff --git a/src/pudl/ferc_to_sqlite/cli.py b/src/pudl/ferc_to_sqlite/cli.py index 7a60c8bc41..5754ee97df 100644 --- a/src/pudl/ferc_to_sqlite/cli.py +++ b/src/pudl/ferc_to_sqlite/cli.py @@ -145,7 +145,7 @@ def main(): # noqa: C901 run_config={ "resources": { "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.dict() + "config": etl_settings.ferc_to_sqlite_settings.model_dump() }, "datastore": { "config": { diff --git a/src/pudl/glue/ferc1_eia.py b/src/pudl/glue/ferc1_eia.py index e75f30f55d..47a49b7a2f 100644 --- a/src/pudl/glue/ferc1_eia.py +++ b/src/pudl/glue/ferc1_eia.py @@ -148,7 +148,7 @@ def get_mapped_plants_eia(): ########################## # Raw Plants and Utilities ########################## -def get_util_ids_ferc1_raw_xbrl(ferc1_engine_xbrl: sa.engine.Engine) -> pd.DataFrame: +def get_util_ids_ferc1_raw_xbrl(ferc1_engine_xbrl: sa.Engine) -> pd.DataFrame: """Grab the utility ids (reported as `entity_id`) in the FERC1 XBRL database.""" all_utils_ferc1_xbrl = ( pd.read_sql( @@ -170,7 +170,7 @@ def get_util_ids_ferc1_raw_xbrl(ferc1_engine_xbrl: sa.engine.Engine) -> pd.DataF return all_utils_ferc1_xbrl -def get_util_ids_ferc1_raw_dbf(ferc1_engine_dbf: sa.engine.Engine) -> pd.DataFrame: +def get_util_ids_ferc1_raw_dbf(ferc1_engine_dbf: sa.Engine) -> pd.DataFrame: """Grab the utility ids (reported as `respondent_id`) in the FERC1 DBF database.""" all_utils_ferc1_dbf = ( pd.read_sql_table("f1_respondent_id", ferc1_engine_dbf) @@ -252,7 +252,7 @@ def drop_invalid_rows(self, df): def get_plants_ferc1_raw_job() -> JobDefinition: """Pull all plants in the FERC Form 1 DBF and XBRL DB for given years. - This job expects ferc1.sqlite and ferc_xbrl.sqlite databases to be populated. + This job expects ferc1_dbf.sqlite and ferc_xbrl.sqlite databases to be populated. """ plant_tables = [ "plants_hydro_ferc1", @@ -419,7 +419,7 @@ def get_utility_most_recent_capacity(pudl_engine) -> pd.DataFrame: gen_caps["utility_id_eia"] = gen_caps["utility_id_eia"].astype("Int64") most_recent_gens_idx = ( - gen_caps.groupby("utility_id_eia")["report_date"].transform(max) + gen_caps.groupby("utility_id_eia")["report_date"].transform("max") == gen_caps["report_date"] ) most_recent_gens = gen_caps.loc[most_recent_gens_idx] diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index 147a012755..e9f576db5a 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -78,7 +78,7 @@ def find_new_ferc1_strings( table: str, field: str, strdict: dict[str, list[str]], - ferc1_engine: sa.engine.Engine, + ferc1_engine: sa.Engine, ) -> set[str]: """Identify as-of-yet uncategorized freeform strings in FERC Form 1. @@ -1047,7 +1047,7 @@ def simplify_columns(df: pd.DataFrame) -> pd.DataFrame: return df -def drop_tables(engine: sa.engine.Engine, clobber: bool = False) -> None: +def drop_tables(engine: sa.Engine, clobber: bool = False) -> None: """Drops all tables from a SQLite database. Creates an sa.schema.MetaData object reflecting the structure of the @@ -1594,21 +1594,20 @@ def flatten_list(xs: Iterable) -> Generator: def convert_df_to_excel_file(df: pd.DataFrame, **kwargs) -> pd.ExcelFile: - """Converts a pandas dataframe to a pandas ExcelFile object. + """Convert a :class:`pandas.DataFrame` into a :class:`pandas.ExcelFile`. - You can pass parameters for pandas.to_excel() function. + Args: + df: The DataFrame to convert. + kwargs: Additional arguments to pass into :meth:`pandas.to_excel`. + + Returns: + The contents of the input DataFrame, represented as an ExcelFile. """ bio = BytesIO() - - writer = pd.ExcelWriter(bio, engine="xlsxwriter") - df.to_excel(writer, **kwargs) - - writer.close() - + with pd.ExcelWriter(bio, engine="xlsxwriter") as writer: + df.to_excel(writer, **kwargs) bio.seek(0) - workbook = bio.read() - - return pd.ExcelFile(workbook) + return pd.ExcelFile(bio) def get_asset_keys( diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 2114584517..15e79eb2a9 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -134,7 +134,7 @@ def _get_table_name(self, context) -> str: table_name = context.get_identifier() return table_name - def _setup_database(self, timeout: float = 1_000.0) -> sa.engine.Engine: + def _setup_database(self, timeout: float = 1_000.0) -> sa.Engine: """Create database and metadata if they don't exist. Args: @@ -572,10 +572,7 @@ def __init__( """ super().__init__(base_dir, db_name, md, timeout) - def _setup_database( - self, - timeout: float = 1_000.0, - ) -> sa.engine.Engine: + def _setup_database(self, timeout: float = 1_000.0) -> sa.Engine: """Create database engine and read the metadata. Args: @@ -674,7 +671,7 @@ def ferc1_dbf_sqlite_io_manager(init_context) -> FercDBFSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" return FercDBFSQLiteIOManager( base_dir=PudlPaths().output_dir, - db_name="ferc1", + db_name="ferc1_dbf", ) @@ -692,17 +689,19 @@ def filter_for_freshest_data( ) -> pd.DataFrame: """Get most updated values for each XBRL context. - An XBRL context includes an entity ID, the time period the data applies - to, and other dimensions such as utility type. Each context has its own - ID, but they are frequently redefined with the same contents but - different IDs - so we identify them by their actual content. + An XBRL context includes an entity ID, the time period the data applies to, and + other dimensions such as utility type. Each context has its own ID, but they are + frequently redefined with the same contents but different IDs - so we identify + them by their actual content. - Each row in our SQLite database includes all the facts for one - context/filing pair. + Each row in our SQLite database includes all the facts for one context/filing + pair. - If one context is represented in multiple filings, we take the facts from the most recently-published filing. + If one context is represented in multiple filings, we take the facts from the + most recently-published filing. - This means that if a recently-published filing does not include a value for a fact that was previously reported, then that value will remain null. We do not + This means that if a recently-published filing does not include a value for a + fact that was previously reported, then that value will remain null. We do not forward-fill facts on a fact-by-fact basis. """ filing_metadata_cols = {"publication_time", "filing_name"} @@ -734,13 +733,13 @@ def filter_for_freshest_data( def refine_report_year(df: pd.DataFrame, xbrl_years: list[int]) -> pd.DataFrame: """Set a fact's report year by its actual dates. - Sometimes a fact belongs to a context which has no ReportYear - associated with it; other times there are multiple ReportYears - associated with a single filing. In these cases the report year of a - specific fact may be associated with the other years in the filing. + Sometimes a fact belongs to a context which has no ReportYear associated with + it; other times there are multiple ReportYears associated with a single filing. + In these cases the report year of a specific fact may be associated with the + other years in the filing. - In many cases we can infer the actual report year from the fact's - associated time period - either duration or instant. + In many cases we can infer the actual report year from the fact's associated + time period - either duration or instant. """ is_duration = len({"start_date", "end_date"} - set(df.columns)) == 0 is_instant = "date" in df.columns diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index a5e8f0be31..27f8655bdf 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -4,10 +4,11 @@ import json import re import sys +import warnings from collections.abc import Callable, Iterable from functools import lru_cache from pathlib import Path -from typing import Any, Literal +from typing import Annotated, Any, Literal, Self import jinja2 import pandas as pd @@ -15,7 +16,21 @@ import pydantic import sqlalchemy as sa from pandas._libs.missing import NAType -from pydantic.types import DirectoryPath +from pydantic import ( + AnyHttpUrl, + BaseModel, + ConfigDict, + DirectoryPath, + EmailStr, + StrictBool, + StrictFloat, + StrictInt, + StrictStr, + StringConstraints, + ValidationInfo, + field_validator, + model_validator, +) import pudl.logging_helpers from pudl.metadata.codes import CODE_METADATA @@ -47,6 +62,15 @@ logger = pudl.logging_helpers.get_logger(__name__) +# The BaseModel.schema attribute is deprecated and we are shadowing it to avoid needing +# to define an inconvenient alias for it. +warnings.filterwarnings( + action="ignore", + message='Field name "schema" shadows an attribute in parent "PudlMeta"', + category=UserWarning, + module="pydantic._internal._fields", +) + # ---- Helpers ---- # @@ -139,151 +163,36 @@ def _get_jinja_environment(template_dir: DirectoryPath = None): ) -# ---- Base ---- # - - -class Base(pydantic.BaseModel): - """Custom Pydantic base class. - - It overrides :meth:`fields` and :meth:`schema` to allow properties with those names. - To use them in a class, use an underscore prefix and an alias. - - Examples: - >>> class Class(Base): - ... fields_: list[str] = pydantic.Field(alias="fields") - >>> m = Class(fields=['x']) - >>> m - Class(fields=['x']) - >>> m.fields - ['x'] - >>> m.fields = ['y'] - >>> m.dict() - {'fields': ['y']} - """ - - class Config: - """Custom Pydantic configuration.""" - - validate_all: bool = True - validate_assignment: bool = True - extra: str = "forbid" - arbitrary_types_allowed = True - - def dict(self, *args, by_alias=True, **kwargs) -> dict: # noqa: A003 - """Return as a dictionary.""" - return super().dict(*args, by_alias=by_alias, **kwargs) - - def json(self, *args, by_alias=True, **kwargs) -> str: - """Return as JSON.""" - return super().json(*args, by_alias=by_alias, **kwargs) - - def __getattribute__(self, name: str) -> Any: - """Get attribute.""" - if name in ("fields", "schema") and f"{name}_" in self.__dict__: - name = f"{name}_" - return super().__getattribute__(name) - - def __setattr__(self, name, value) -> None: - """Set attribute.""" - if name in ("fields", "schema") and f"{name}_" in self.__dict__: - name = f"{name}_" - super().__setattr__(name, value) - - def __repr_args__(self) -> list[tuple[str, Any]]: - """Returns the attributes to show in __str__, __repr__, and __pretty__.""" - return [ - (a[:-1] if a in ("fields_", "schema_") else a, v) - for a, v in self.__dict__.items() - ] - - # ---- Class attribute types ---- # # NOTE: Using regex=r"^\S(.*\S)*$" to fail on whitespace is too slow -String = pydantic.constr(min_length=1, strict=True, regex=r"^\S+(\s+\S+)*$") +String = Annotated[ + str, StringConstraints(min_length=1, strict=True, pattern=r"^\S+(\s+\S+)*$") +] """Non-empty :class:`str` with no trailing or leading whitespace.""" -SnakeCase = pydantic.constr( - min_length=1, strict=True, regex=r"^[a-z_][a-z0-9_]*(_[a-z0-9]+)*$" -) +SnakeCase = Annotated[ + str, + StringConstraints( + min_length=1, strict=True, pattern=r"^[a-z_][a-z0-9_]*(_[a-z0-9]+)*$" + ), +] """Snake-case variable name :class:`str` (e.g. 'pudl', 'entity_eia860').""" -Bool = pydantic.StrictBool -"""Any :class:`bool` (`True` or `False`).""" - -Float = pydantic.StrictFloat -"""Any :class:`float`.""" - -Int = pydantic.StrictInt -"""Any :class:`int`.""" - -PositiveInt = pydantic.conint(ge=0, strict=True) +PositiveInt = Annotated[int, pydantic.Field(ge=0, strict=True)] """Positive :class:`int`.""" -PositiveFloat = pydantic.confloat(ge=0, strict=True) +PositiveFloat = Annotated[float, pydantic.Field(ge=0, strict=True)] """Positive :class:`float`.""" -Email = pydantic.EmailStr -"""String representing an email.""" - -HttpUrl = pydantic.AnyHttpUrl -"""Http(s) URL.""" - - -class BaseType: - """Base class for custom pydantic types.""" - - @classmethod - def __get_validators__(cls) -> Callable: - """Yield validator methods.""" - yield cls.validate - - -class Date(BaseType): - """Any :class:`datetime.date`.""" - - @classmethod - def validate(cls, value: Any) -> datetime.date: - """Validate as date.""" - if not isinstance(value, datetime.date): - raise TypeError("value is not a date") - return value - - -class Datetime(BaseType): - """Any :class:`datetime.datetime`.""" - - @classmethod - def validate(cls, value: Any) -> datetime.datetime: - """Validate as datetime.""" - if not isinstance(value, datetime.datetime): - raise TypeError("value is not a datetime") - return value - -class Pattern(BaseType): - """Regular expression pattern.""" - - @classmethod - def validate(cls, value: Any) -> re.Pattern: - """Validate as pattern.""" - if not isinstance(value, str | re.Pattern): - raise TypeError("value is not a string or compiled regular expression") - if isinstance(value, str): - try: - value = re.compile(value) - except re.error: - raise ValueError("string is not a valid regular expression") - return value - - -def StrictList(item_type: type = Any) -> pydantic.ConstrainedList: # noqa: N802 +def StrictList(item_type: type = Any) -> type: # noqa: N802 """Non-empty :class:`list`. Allows :class:`list`, :class:`tuple`, :class:`set`, :class:`frozenset`, :class:`collections.deque`, or generators and casts to a :class:`list`. """ - return pydantic.conlist(item_type=item_type, min_items=1) + return Annotated[list[item_type], pydantic.Field(min_length=1)] # ---- Class attribute validators ---- # @@ -303,43 +212,60 @@ def _validator(*names, fn: Callable) -> Callable: Args: names: Names of attributes to validate. - fn: Validation function (see :meth:`pydantic.validator`). + fn: Validation function (see :meth:`pydantic.field_validator`). Examples: - >>> class Class(Base): + >>> class Class(BaseModel): ... x: list = None ... _check_unique = _validator("x", fn=_check_unique) - >>> Class(y=[0, 0]) + >>> Class(x=[0, 0]) Traceback (most recent call last): ValidationError: ... """ - return pydantic.validator(*names, allow_reuse=True)(fn) + return field_validator(*names)(fn) -# ---- Classes: Field ---- # +######################################################################################## +# PUDL Metadata Classes +######################################################################################## +class PudlMeta(BaseModel): + """A base model that configures some options for PUDL metadata classes.""" + + model_config = ConfigDict( + extra="forbid", + validate_default=True, + validate_assignment=True, + ) -class FieldConstraints(Base): +class FieldConstraints(PudlMeta): """Field constraints (`resource.schema.fields[...].constraints`). See https://specs.frictionlessdata.io/table-schema/#constraints. """ - required: Bool = False - unique: Bool = False - min_length: PositiveInt = None - max_length: PositiveInt = None - minimum: Int | Float | Date | Datetime = None - maximum: Int | Float | Date | Datetime = None - pattern: Pattern = None - # TODO: Replace with String (min_length=1) once "" removed from enums - enum: StrictList(pydantic.StrictStr | Int | Float | Bool | Date | Datetime) = None + required: StrictBool = False + unique: StrictBool = False + min_length: PositiveInt | None = None + max_length: PositiveInt | None = None + minimum: StrictInt | StrictFloat | datetime.date | datetime.datetime | None = None + maximum: StrictInt | StrictFloat | datetime.date | datetime.datetime | None = None + pattern: re.Pattern | None = None + enum: StrictList( + String + | StrictInt + | StrictFloat + | StrictBool + | datetime.date + | datetime.datetime + ) | None = None _check_unique = _validator("enum", fn=_check_unique) - @pydantic.validator("max_length") - def _check_max_length(cls, value, values): # noqa: N805 - minimum, maximum = values.get("min_length"), value + @field_validator("max_length") + @classmethod + def _check_max_length(cls, value, info: ValidationInfo): + minimum, maximum = info.data.get("min_length"), value if minimum is not None and maximum is not None: if type(minimum) is not type(maximum): raise ValueError("must be same type as min_length") @@ -347,9 +273,10 @@ def _check_max_length(cls, value, values): # noqa: N805 raise ValueError("must be greater or equal to min_length") return value - @pydantic.validator("maximum") - def _check_max(cls, value, values): # noqa: N805 - minimum, maximum = values.get("minimum"), value + @field_validator("maximum") + @classmethod + def _check_max(cls, value, info: ValidationInfo): + minimum, maximum = info.data.get("minimum"), value if minimum is not None and maximum is not None: if type(minimum) is not type(maximum): raise ValueError("must be same type as minimum") @@ -358,7 +285,7 @@ def _check_max(cls, value, values): # noqa: N805 return value -class FieldHarvest(Base): +class FieldHarvest(PudlMeta): """Field harvest parameters (`resource.schema.fields[...].harvest`).""" # NOTE: Callables with defaults must use pydantic.Field() to not bind to self @@ -371,7 +298,7 @@ class FieldHarvest(Base): """Fraction of invalid groups above which result is considered invalid.""" -class Encoder(Base): +class Encoder(PudlMeta): """A class that allows us to standardize reported categorical codes. Often the original data we are integrating uses short codes to indicate a @@ -416,14 +343,14 @@ class Encoder(Base): values. """ - ignored_codes: list[Int | str] = [] + ignored_codes: list[StrictInt | str] = [] """A list of non-standard codes which appear in the data, and will be set to NA. These codes may be the result of data entry errors, and we are unable to map them to the appropriate canonical code. They are discarded from the raw input data. """ - code_fixes: dict[Int | String, Int | String] = {} + code_fixes: dict[StrictInt | String, StrictInt | String] = {} """A dictionary mapping non-standard codes to canonical, standardized codes. The intended meanings of some non-standard codes are clear, and therefore they can @@ -431,11 +358,15 @@ class Encoder(Base): the result of data entry errors or changes in the stanard codes over time. """ - name: String = None + name: String | None = None """The name of the code.""" - @pydantic.validator("df") - def _df_is_encoding_table(cls, df): # noqa: N805 + # Required to allow DataFrame + model_config = ConfigDict(arbitrary_types_allowed=True) + + @field_validator("df") + @classmethod + def _df_is_encoding_table(cls, df: pd.DataFrame): """Verify that the coding table provides both codes and descriptions.""" errors = [] if "code" not in df.columns or "description" not in df.columns: @@ -449,52 +380,56 @@ def _df_is_encoding_table(cls, df): # noqa: N805 raise ValueError(format_errors(*errors, pydantic=True)) return df - @pydantic.validator("ignored_codes") - def _good_and_ignored_codes_are_disjoint(cls, ignored_codes, values): # noqa: N805 + @field_validator("ignored_codes") + @classmethod + def _good_and_ignored_codes_are_disjoint(cls, ignored_codes, info: ValidationInfo): """Check that there's no overlap between good and ignored codes.""" - if "df" not in values: + if "df" not in info.data: return ignored_codes errors = [] - overlap = set(values["df"]["code"]).intersection(ignored_codes) + overlap = set(info.data["df"]["code"]).intersection(ignored_codes) if overlap: errors.append(f"Overlap found between good and ignored codes: {overlap}.") if errors: raise ValueError(format_errors(*errors, pydantic=True)) return ignored_codes - @pydantic.validator("code_fixes") - def _good_and_fixable_codes_are_disjoint(cls, code_fixes, values): # noqa: N805 + @field_validator("code_fixes") + @classmethod + def _good_and_fixable_codes_are_disjoint(cls, code_fixes, info: ValidationInfo): """Check that there's no overlap between the good and fixable codes.""" - if "df" not in values: + if "df" not in info.data: return code_fixes errors = [] - overlap = set(values["df"]["code"]).intersection(code_fixes) + overlap = set(info.data["df"]["code"]).intersection(code_fixes) if overlap: errors.append(f"Overlap found between good and fixable codes: {overlap}") if errors: raise ValueError(format_errors(*errors, pydantic=True)) return code_fixes - @pydantic.validator("code_fixes") - def _fixable_and_ignored_codes_are_disjoint(cls, code_fixes, values): # noqa: N805 + @field_validator("code_fixes") + @classmethod + def _fixable_and_ignored_codes_are_disjoint(cls, code_fixes, info: ValidationInfo): """Check that there's no overlap between the ignored and fixable codes.""" - if "ignored_codes" not in values: + if "ignored_codes" not in info.data: return code_fixes errors = [] - overlap = set(code_fixes).intersection(values["ignored_codes"]) + overlap = set(code_fixes).intersection(info.data["ignored_codes"]) if overlap: errors.append(f"Overlap found between fixable and ignored codes: {overlap}") if errors: raise ValueError(format_errors(*errors, pydantic=True)) return code_fixes - @pydantic.validator("code_fixes") - def _check_fixed_codes_are_good_codes(cls, code_fixes, values): # noqa: N805 + @field_validator("code_fixes") + @classmethod + def _check_fixed_codes_are_good_codes(cls, code_fixes, info: ValidationInfo): """Check that every every fixed code is also one of the good codes.""" - if "df" not in values: + if "df" not in info.data: return code_fixes errors = [] - bad_codes = set(code_fixes.values()).difference(values["df"]["code"]) + bad_codes = set(code_fixes.values()).difference(info.data["df"]["code"]) if bad_codes: errors.append( f"Some fixed codes aren't in the list of good codes: {bad_codes}" @@ -545,7 +480,7 @@ def from_code_id(cls, x: str) -> "Encoder": return cls(**copy.deepcopy(CODE_METADATA[x]), name=x) def to_rst( - self, top_dir: DirectoryPath, csv_subdir: DirectoryPath, is_header: Bool + self, top_dir: DirectoryPath, csv_subdir: DirectoryPath, is_header: StrictBool ) -> String: """Ouput dataframe to a csv for use in jinja template. @@ -564,7 +499,7 @@ def to_rst( return rendered -class Field(Base): +class Field(PudlMeta): """Field (`resource.schema.fields[...]`). See https://specs.frictionlessdata.io/table-schema/#field-descriptors. @@ -581,6 +516,7 @@ class Field(Base): """ name: SnakeCase + # Shadows built-in type. type: Literal[ # noqa: A003 "string", "number", @@ -590,19 +526,21 @@ class Field(Base): "datetime", "year", ] - title: String = None - format: Literal["default"] = "default" # noqa: A003 - description: String = None - unit: String = None - constraints: FieldConstraints = {} - harvest: FieldHarvest = {} - encoder: Encoder = None - - @pydantic.validator("constraints") - def _check_constraints(cls, value, values): # noqa: N805, C901 - if "type" not in values: + title: String | None = None + # Alias required to avoid shadowing Python built-in format() + format_: Literal["default"] = pydantic.Field(alias="format", default="default") + description: String | None = None + unit: String | None = None + constraints: FieldConstraints = FieldConstraints() + harvest: FieldHarvest = FieldHarvest() + encoder: Encoder | None = None + + @field_validator("constraints") + @classmethod + def _check_constraints(cls, value, info: ValidationInfo): # noqa: C901 + if "type" not in info.data: return value - dtype = values["type"] + dtype = info.data["type"] errors = [] for key in ("min_length", "max_length", "pattern"): if getattr(value, key) is not None and dtype != "string": @@ -622,12 +560,13 @@ def _check_constraints(cls, value, values): # noqa: N805, C901 raise ValueError(format_errors(*errors, pydantic=True)) return value - @pydantic.validator("encoder") - def _check_encoder(cls, value, values): # noqa: N805 - if "type" not in values or value is None: + @field_validator("encoder") + @classmethod + def _check_encoder(cls, value, info: ValidationInfo): + if "type" not in info.data or value is None: return value errors = [] - dtype = values["type"] + dtype = info.data["type"] if dtype not in ["string", "integer"]: errors.append( "Encoding only supported for string and integer fields, found " @@ -749,32 +688,33 @@ def encode(self, col: pd.Series, dtype: type | None = None) -> pd.Series: # ---- Classes: Resource ---- # -class ForeignKeyReference(Base): +class ForeignKeyReference(PudlMeta): """Foreign key reference (`resource.schema.foreign_keys[...].reference`). See https://specs.frictionlessdata.io/table-schema/#foreign-keys. """ resource: SnakeCase - fields_: StrictList(SnakeCase) = pydantic.Field(alias="fields") + fields: StrictList(SnakeCase) - _check_unique = _validator("fields_", fn=_check_unique) + _check_unique = _validator("fields", fn=_check_unique) -class ForeignKey(Base): +class ForeignKey(PudlMeta): """Foreign key (`resource.schema.foreign_keys[...]`). See https://specs.frictionlessdata.io/table-schema/#foreign-keys. """ - fields_: StrictList(SnakeCase) = pydantic.Field(alias="fields") + fields: StrictList(SnakeCase) reference: ForeignKeyReference - _check_unique = _validator("fields_", fn=_check_unique) + _check_unique = _validator("fields", fn=_check_unique) - @pydantic.validator("reference") - def _check_fields_equal_length(cls, value, values): # noqa: N805 - if "fields_" in values and len(value.fields) != len(values["fields_"]): + @field_validator("reference") + @classmethod + def _check_fields_equal_length(cls, value, info: ValidationInfo): + if "fields" in info.data and len(value.fields) != len(info.data["fields"]): raise ValueError("fields and reference.fields are not equal length") return value @@ -790,53 +730,60 @@ def to_sql(self) -> sa.ForeignKeyConstraint: ) -class Schema(Base): +class Schema(PudlMeta): """Table schema (`resource.schema`). See https://specs.frictionlessdata.io/table-schema. """ - fields_: StrictList(Field) = pydantic.Field(alias="fields") - missing_values: list[pydantic.StrictStr] = [""] - primary_key: StrictList(SnakeCase) = None + fields: StrictList(Field) + missing_values: list[StrictStr] = [""] + primary_key: StrictList(SnakeCase) | None = None foreign_keys: list[ForeignKey] = [] _check_unique = _validator( "missing_values", "primary_key", "foreign_keys", fn=_check_unique ) - @pydantic.validator("fields_") - def _check_field_names_unique(cls, value): # noqa: N805 - _check_unique([f.name for f in value]) - return value + @field_validator("fields") + @classmethod + def _check_field_names_unique(cls, fields: list[Field]): + _check_unique([f.name for f in fields]) + return fields - @pydantic.validator("primary_key") - def _check_primary_key_in_fields(cls, value, values): # noqa: N805 - if value is not None and "fields_" in values: + @field_validator("primary_key") + @classmethod + def _check_primary_key_in_fields(cls, pk, info: ValidationInfo): + """Verify that all primary key elements also appear in the schema fields.""" + if pk is not None and "fields" in info.data: missing = [] - names = [f.name for f in values["fields_"]] - for name in value: + names = [f.name for f in info.data["fields"]] + for name in pk: if name in names: # Flag primary key fields as required - field = values["fields_"][names.index(name)] + field = info.data["fields"][names.index(name)] field.constraints.required = True else: missing.append(field.name) if missing: raise ValueError(f"names {missing} missing from fields") - return value - - @pydantic.validator("foreign_keys", each_item=True) - def _check_foreign_key_in_fields(cls, value, values): # noqa: N805 - if value and "fields_" in values: - names = [f.name for f in values["fields_"]] - missing = [x for x in value.fields if x not in names] - if missing: - raise ValueError(f"names {missing} missing from fields") - return value + return pk + + @model_validator(mode="after") + def _check_foreign_key_in_fields(self: Self): + """Verify that all foreign key elements also appear in the schema fields.""" + if self.foreign_keys: + schema_field_names = [field.name for field in self.fields] + for fk in self.foreign_keys: + missing_field_names = set(fk.fields).difference(schema_field_names) + if missing_field_names: + raise ValueError( + f"Foreign key fields {missing_field_names} not found in schema." + ) + return self -class License(Base): +class License(PudlMeta): """Data license (`package|resource.licenses[...]`). See https://specs.frictionlessdata.io/data-package/#licenses. @@ -844,7 +791,7 @@ class License(Base): name: String title: String - path: HttpUrl + path: AnyHttpUrl @staticmethod def dict_from_id(x: str) -> dict: @@ -857,15 +804,15 @@ def from_id(cls, x: str) -> "License": return cls(**cls.dict_from_id(x)) -class Contributor(Base): +class Contributor(PudlMeta): """Data contributor (`package.contributors[...]`). See https://specs.frictionlessdata.io/data-package/#contributors. """ title: String - path: HttpUrl = None - email: Email = None + path: AnyHttpUrl | None = None + email: EmailStr | None = None role: Literal[ "author", "contributor", "maintainer", "publisher", "wrangler" ] = "contributor" @@ -890,8 +837,8 @@ class Contributor(Base): "supervisor", "work package leader", ] = "project member" - organization: String = None - orcid: String = None + organization: String | None = None + orcid: String | None = None @staticmethod def dict_from_id(x: str) -> dict: @@ -911,7 +858,7 @@ def __hash__(self): return hash(str(self)) -class DataSource(Base): +class DataSource(PudlMeta): """A data source that has been integrated into PUDL. This metadata is used for: @@ -927,19 +874,19 @@ class DataSource(Base): """ name: SnakeCase - title: String = None - description: String = None - field_namespace: String = None + title: String | None = None + description: String | None = None + field_namespace: String | None = None keywords: list[str] = [] - path: HttpUrl = None + path: AnyHttpUrl | None = None contributors: list[Contributor] = [] license_raw: License license_pudl: License - concept_doi: ZenodoDoi = None + concept_doi: ZenodoDoi | None = None working_partitions: dict[SnakeCase, Any] = {} source_file_dict: dict[SnakeCase, Any] = {} # agency: Agency # needs to be defined - email: Email = None + email: EmailStr | None = None def get_resource_ids(self) -> list[str]: """Compile list of resource IDs associated with this data source.""" @@ -1026,7 +973,6 @@ def dict_from_id(x: str) -> dict: """Look up the source by source name in the metadata.""" # If ID ends with _xbrl strip end to find data source lookup_id = x.replace("_xbrl", "") - return {"name": x, **copy.deepcopy(SOURCES[lookup_id])} @classmethod @@ -1035,10 +981,10 @@ def from_id(cls, x: str) -> "DataSource": return cls(**cls.dict_from_id(x)) -class ResourceHarvest(Base): +class ResourceHarvest(PudlMeta): """Resource harvest parameters (`resource.harvest`).""" - harvest: Bool = False + harvest: StrictBool = False """Whether to harvest from dataframes based on field names. If `False`, the dataframe with the same name is used and the process is limited to @@ -1049,7 +995,7 @@ class ResourceHarvest(Base): """Fraction of invalid fields above which result is considerd invalid.""" -class Resource(Base): +class Resource(PudlMeta): """Tabular data resource (`package.resources[...]`). See https://specs.frictionlessdata.io/tabular-data-resource. @@ -1172,20 +1118,21 @@ class Resource(Base): """ name: SnakeCase - title: String = None - description: String = None - harvest: ResourceHarvest = {} - schema_: Schema = pydantic.Field(alias="schema") - format_: String = pydantic.Field(alias="format", default=None) - mediatype: String = None - path: String = None - dialect: dict[str, str] = None + title: String | None = None + description: String | None = None + harvest: ResourceHarvest = ResourceHarvest() + schema: Schema + # Alias required to avoid shadowing Python built-in format() + format_: String | None = pydantic.Field(alias="format", default=None) + mediatype: String | None = None + path: String | None = None + dialect: dict[str, str] | None = None profile: String = "tabular-data-resource" contributors: list[Contributor] = [] licenses: list[License] = [] sources: list[DataSource] = [] keywords: list[String] = [] - encoder: Encoder = None + encoder: Encoder | None = None field_namespace: Literal[ "eia", "epacems", @@ -1195,7 +1142,7 @@ class Resource(Base): "pudl", "ppe", "eia_bulk_elec", - ] = None + ] | None = None etl_group: Literal[ "eia860", "eia861", @@ -1215,16 +1162,17 @@ class Resource(Base): "state_demand", "static_pudl", "service_territories", - ] = None + ] | None = None create_database_schema: bool = True _check_unique = _validator( "contributors", "keywords", "licenses", "sources", fn=_check_unique ) - @pydantic.validator("schema_") - def _check_harvest_primary_key(cls, value, values): # noqa: N805 - if values["harvest"].harvest and not value.primary_key: + @field_validator("schema") + @classmethod + def _check_harvest_primary_key(cls, value, info: ValidationInfo): + if info.data["harvest"].harvest and not value.primary_key: raise ValueError("Harvesting requires a primary key") return value @@ -1713,7 +1661,7 @@ def encode(self, df: pd.DataFrame) -> pd.DataFrame: # ---- Package ---- # -class Package(Base): +class Package(PudlMeta): """Tabular data package. See https://specs.frictionlessdata.io/data-package. @@ -1741,29 +1689,31 @@ class Package(Base): """ name: String - title: String = None - description: String = None + title: String | None = None + description: String | None = None keywords: list[String] = [] - homepage: HttpUrl = "https://catalyst.coop/pudl" - created: Datetime = datetime.datetime.utcnow() + homepage: AnyHttpUrl = AnyHttpUrl("https://catalyst.coop/pudl") + created: datetime.datetime = datetime.datetime.utcnow() contributors: list[Contributor] = [] sources: list[DataSource] = [] licenses: list[License] = [] resources: StrictList(Resource) profile: String = "tabular-data-package" + model_config = ConfigDict(validate_assignment=False) - @pydantic.validator("resources") - def _check_foreign_keys(cls, value): # noqa: N805 - rnames = [resource.name for resource in value] + @field_validator("resources") + @classmethod + def _check_foreign_keys(cls, resources: list[Resource]): + rnames = [resource.name for resource in resources] errors = [] - for resource in value: + for resource in resources: for foreign_key in resource.schema.foreign_keys: rname = foreign_key.reference.resource tag = f"[{resource.name} -> {rname}]" if rname not in rnames: errors.append(f"{tag}: Reference not found") continue - reference = value[rnames.index(rname)] + reference = resources[rnames.index(rname)] if not reference.schema.primary_key: errors.append(f"{tag}: Reference missing primary key") continue @@ -1778,15 +1728,23 @@ def _check_foreign_keys(cls, value): # noqa: N805 raise ValueError( format_errors(*errors, title="Foreign keys", pydantic=True) ) - return value + return resources + + @model_validator(mode="after") + def _populate_from_resources(self: Self): + """Populate Package attributes from similar deduplicated Resource attributes. - @pydantic.root_validator(skip_on_failure=True) - def _populate_from_resources(cls, values): # noqa: N805 + Resources and Packages share some descriptive attributes. When building a + Package out of a collection of Resources, we want the Package to reflect the + union of all the analogous values found in the Resources, but we don't want + any duplicates. We may also get values directly from the Package inputs. + """ for key in ("keywords", "contributors", "sources", "licenses"): - values[key] = _unique( - values[key], *[getattr(r, key) for r in values["resources"]] - ) - return values + package_value = getattr(self, key) + resource_values = [getattr(resource, key) for resource in self.resources] + deduped_values = _unique(package_value, *resource_values) + setattr(self, key, deduped_values) + return self @classmethod @lru_cache @@ -1899,7 +1857,7 @@ def to_sql( return metadata -class CodeMetadata(Base): +class CodeMetadata(PudlMeta): """A list of Encoders for standardizing and documenting categorical codes. Used to export static coding metadata to PUDL documentation automatically @@ -1934,7 +1892,7 @@ def to_rst( f.write(rendered) -class DatasetteMetadata(Base): +class DatasetteMetadata(PudlMeta): """A collection of Data Sources and Resources for metadata export. Used to create metadata YAML file to accompany Datasette. @@ -1956,22 +1914,26 @@ class DatasetteMetadata(Base): def from_data_source_ids( cls, output_path: Path, - data_source_ids: Iterable[str] = [ + data_source_ids: list[str] = [ "pudl", - "ferc1", "eia860", - "eia861", "eia860m", + "eia861", "eia923", + "ferc1", + "ferc2", + "ferc6", + "ferc60", + "ferc714", ], - xbrl_ids: Iterable[str] = [ + xbrl_ids: list[str] = [ "ferc1_xbrl", "ferc2_xbrl", "ferc6_xbrl", "ferc60_xbrl", "ferc714_xbrl", ], - extra_etl_groups: Iterable[str] = [ + extra_etl_groups: list[str] = [ "entity_eia", "glue", "static_eia", @@ -1989,9 +1951,7 @@ def from_data_source_ids( extra_etl_groups: ETL groups with resources that should be included """ # Compile a list of DataSource objects for use in the template - data_sources = [ - DataSource.from_id(ds_id) for ds_id in data_source_ids + xbrl_ids - ] + data_sources = [DataSource.from_id(ds_id) for ds_id in data_source_ids] # Instantiate all possible resources in a Package: pkg = Package.from_resource_ids() diff --git a/src/pudl/metadata/constants.py b/src/pudl/metadata/constants.py index 95bffa6b56..5121b8605d 100644 --- a/src/pudl/metadata/constants.py +++ b/src/pudl/metadata/constants.py @@ -8,12 +8,12 @@ from sqlalchemy.dialects.sqlite import DATETIME as SQLITE_DATETIME FIELD_DTYPES_PANDAS: dict[str, str] = { - "string": "string", - "number": "float64", - "integer": "Int64", "boolean": "boolean", "date": "datetime64[s]", "datetime": "datetime64[s]", + "integer": "Int64", + "number": "float64", + "string": "string", "year": "datetime64[s]", } """Pandas data type by PUDL field type (Data Package `field.type`).""" @@ -43,13 +43,13 @@ """SQLAlchemy column types by PUDL field type (Data Package `field.type`).""" CONSTRAINT_DTYPES: dict[str, type] = { - "string": str, - "integer": int, - "year": int, - "number": float, "boolean": bool, "date": datetime.date, "datetime": datetime.datetime, + "integer": int, + "number": float, + "string": str, + "year": int, } """Python types for field constraints by PUDL field type (Data Package `field.type`).""" diff --git a/src/pudl/metadata/fields.py b/src/pudl/metadata/fields.py index 52cd91cd2c..313c585178 100644 --- a/src/pudl/metadata/fields.py +++ b/src/pudl/metadata/fields.py @@ -665,7 +665,17 @@ "type": "number", "description": "Boiler efficiency percentage when burning at 50 percent load to the nearest 0.1 percent.", }, - "eia_code": {"type": "integer"}, + "eia_code": { + "type": "integer", + "description": ( + "EIA utility or balancing area authority ID associated with this FERC Form " + "714 respondent. Note that many utilities are also balancing authorities " + "and in many cases EIA uses the same integer ID to identify a utility in " + "its role as a balancing authority AND as a utility, but there is no " + "requirement that these IDs be the same, and in a number of cases they are " + "different." + ), + }, "electric_plant": { "type": "number", "description": "Electric Plant In Service (USD).", @@ -2262,8 +2272,14 @@ ) }, }, - "respondent_id_ferc714": {"type": "integer"}, - "respondent_name_ferc714": {"type": "string"}, + "respondent_id_ferc714": { + "type": "integer", + "description": "FERC Form 714 respondent ID. Note that this ID does not correspond to FERC respondent IDs from other forms.", + }, + "respondent_name_ferc714": { + "type": "string", + "description": "Name of the utility, balancing area authority, or planning authority responding to FERC Form 714.", + }, "respondent_type": { "type": "string", "constraints": {"enum": ["utility", "balancing_authority"]}, diff --git a/src/pudl/metadata/helpers.py b/src/pudl/metadata/helpers.py index 790a07a11c..23b2003fa7 100644 --- a/src/pudl/metadata/helpers.py +++ b/src/pudl/metadata/helpers.py @@ -581,7 +581,7 @@ def groupby_aggregate( # noqa: C901 result = df[by].drop_duplicates().set_index(by) if not raised: # Move errors to report and replace errors with nulls - is_error = result.applymap(lambda x: isinstance(x, AggregationError)) + is_error = result.map(lambda x: isinstance(x, AggregationError)) for col in data_columns: report = result[col][is_error[col]] if not report.empty: diff --git a/src/pudl/metadata/resources/eia860.py b/src/pudl/metadata/resources/eia860.py index 22ec9424c6..cdab8d5539 100644 --- a/src/pudl/metadata/resources/eia860.py +++ b/src/pudl/metadata/resources/eia860.py @@ -501,8 +501,8 @@ "description": ( """The cost, type, operating status, retirement date, and install year of emissions control equipment reported to EIA. Includes control ids for sulfur dioxide -(SO2), particulate matter, mercury, nitrogen oxide (NOX), and acid (HCl) gas monitoring. -""" +(SO2), particulate matter, mercury, nitrogen oxide (NOX), and acid (HCl) gas +monitoring.""" ), "schema": { "fields": [ @@ -533,8 +533,7 @@ emissions control equipment reported to EIA. Includes control ids for sulfur dioxide (SO2), particulate matter, mercury, nitrogen oxide (NOX), and acid (HCl) gas monitoring. The denormalized version contains plant name, utility id, pudl id, and utility name -columns. -""" +columns.""" ), "schema": { "fields": [ @@ -569,8 +568,7 @@ "description": ( """A table that links EIA boiler IDs to emissions control IDs for NOx, SO2, mercury, and particulate monitoring. The relationship between the IDs is sometimes many -to many. -""" +to many.""" ), "schema": { "fields": [ @@ -617,8 +615,7 @@ "boiler_stack_flue_assn_eia860": { "description": ( """A table that links EIA boiler IDs to EIA stack and/or flue -system IDs. -""" +system IDs.""" ), "schema": { "fields": [ diff --git a/src/pudl/metadata/resources/eia923.py b/src/pudl/metadata/resources/eia923.py index f79bd60360..5de5724284 100644 --- a/src/pudl/metadata/resources/eia923.py +++ b/src/pudl/metadata/resources/eia923.py @@ -18,8 +18,7 @@ complex. Note that a small number of respondents only report annual fuel consumption, and all of -it is reported in December. -""" +it is reported in December.""" ), "fuel_receipts_costs_eia923": ( """Data describing fuel deliveries to power plants, reported in EIA-923 Schedule 2, Part A. @@ -44,8 +43,7 @@ Northeastern US reports essentially no fine-grained data about its natural gas prices. Additional data which we haven't yet integrated is available in a similar format from -2002-2008 via the EIA-423, and going back as far as 1972 from the FERC-423. -""" +2002-2008 via the EIA-423, and going back as far as 1972 from the FERC-423.""" ), "generation_eia923": ( """EIA-923 Monthly Generating Unit Net Generation. From EIA-923 Schedule 3. @@ -62,8 +60,7 @@ incomplete boiler-generator associations. Note that a small number of respondents only report annual net generation, and all of -it is reported in December. -""" +it is reported in December.""" ), "generation_fuel_eia923": ( """EIA-923 Monthly Generation and Fuel Consumption Time Series. From EIA-923 Schedule 3. @@ -83,8 +80,7 @@ generation. Note that a small number of respondents only report annual fuel consumption and net -generation, and all of it is reported in December. -""" +generation, and all of it is reported in December.""" ), "generation_fuel_nuclear_eia923": ( """EIA-923 Monthly Generation and Fuel Consumption Time Series. From EIA-923 Schedule 3. @@ -93,8 +89,7 @@ fuel and prime mover within a nuclear generation unit. This data is originally reported alongside similar information for fossil fuel plants, but the nuclear data is reported by (nuclear) generation unit rather than fuel type and prime mover, and so has a -different primary key. -""" +different primary key.""" ), "generation_fuel_combined_eia923": ( """EIA-923 Monthly Generation and Fuel Consumption Time Series. From EIA-923 Schedule 3. @@ -102,8 +97,7 @@ Denormalized, combined data from the ``generation_fuel_eia923`` and ``generation_fuel_nuclear_eia923`` with nuclear generation aggregated from the nuclear generation unit level up to the plant prime mover level, so as to be compatible with -fossil fuel generation data. -""" +fossil fuel generation data.""" ), } @@ -256,8 +250,7 @@ We have not yet taken the time to rigorously clean this data, but it could be linked with both Mining Safety and Health Administration (MSHA) and USGS data to provide more insight into where coal is coming from, and what the employment and geological context -is for those supplies. -""" +is for those supplies.""" ), "schema": { "fields": [ diff --git a/src/pudl/metadata/resources/ferc1.py b/src/pudl/metadata/resources/ferc1.py index d2ea910af5..b6b95a72b6 100644 --- a/src/pudl/metadata/resources/ferc1.py +++ b/src/pudl/metadata/resources/ferc1.py @@ -10,6 +10,7 @@ "fields": [ "utility_id_ferc1", "report_year", + "utility_type", "record_id", "asset_type", "ending_balance", @@ -41,6 +42,7 @@ "balance", "ferc_account", "row_type_xbrl", + "utility_type", ], "primary_key": [ "utility_id_ferc1", @@ -410,6 +412,7 @@ "ending_balance", "record_id", "utility_type", + "plant_status", ], "primary_key": ["utility_id_ferc1", "report_year", "ferc_account_label"], }, @@ -787,6 +790,7 @@ "balance", "ferc_account", "row_type_xbrl", + "utility_type", ], "primary_key": [ "utility_id_ferc1", @@ -869,6 +873,7 @@ "ferc_account", "row_type_xbrl", "starting_balance", + "utility_type", ], "primary_key": [ "utility_id_ferc1", @@ -895,6 +900,7 @@ "balance", "ferc_account", "row_type_xbrl", + "utility_type", ], "primary_key": [ "utility_id_ferc1", @@ -1196,6 +1202,7 @@ "balance", "ferc_account", "row_type_xbrl", + "utility_type", ], "primary_key": [ "utility_id_ferc1", @@ -1549,6 +1556,7 @@ "utility_id_pudl", "utility_name_ferc1", "utility_type", + "plant_status", "record_id", "additions", "adjustments", diff --git a/src/pudl/metadata/resources/ferc1_eia_record_linkage.py b/src/pudl/metadata/resources/ferc1_eia_record_linkage.py index e6cb93a781..e1a5f89032 100644 --- a/src/pudl/metadata/resources/ferc1_eia_record_linkage.py +++ b/src/pudl/metadata/resources/ferc1_eia_record_linkage.py @@ -27,8 +27,7 @@ and the total records are labeled as "total". This table includes A LOT of duplicative information about EIA plants. It is primarily -meant for use as an input into the record linkage between FERC1 plants and EIA. -""", +meant for use as an input into the record linkage between FERC1 plants and EIA.""", "schema": { "fields": [ "record_id_eia", @@ -137,8 +136,7 @@ The EIA data associated with each FERC plant record comes from our Plant Parts EIA table. The EIA data in each record represents an aggregation of several slices of an EIA -plant, across both physical characteristics and utility ownership. -""", +plant, across both physical characteristics and utility ownership.""", "schema": { "fields": [ "record_id_ferc1", diff --git a/src/pudl/metadata/resources/glue.py b/src/pudl/metadata/resources/glue.py index fcfe802a8e..3a4898baf6 100644 --- a/src/pudl/metadata/resources/glue.py +++ b/src/pudl/metadata/resources/glue.py @@ -18,8 +18,7 @@ Our version of the crosswalk clarifies some of the column names and removes unmatched rows. The :func:`pudl.etl.glue_assets.epacamd_eia` function doc strings explain -what changes are made from the EPA's version. -""", +what changes are made from the EPA's version.""", "schema": { "fields": [ "report_year", @@ -71,8 +70,7 @@ This table does not have primary keys because the primary keys would have been: plant_id_eia, generator_id, subplant_id and emissions_unit_id_epa, but there are some null records in the generator_id column because ~2 percent of all EPA CAMD records are not -successfully mapped to EIA generators. -""", +successfully mapped to EIA generators.""", "schema": { "fields": [ "plant_id_eia", diff --git a/src/pudl/metadata/templates/datasette-metadata.yml.jinja b/src/pudl/metadata/templates/datasette-metadata.yml.jinja index 8872e6ff7b..38466bb359 100644 --- a/src/pudl/metadata/templates/datasette-metadata.yml.jinja +++ b/src/pudl/metadata/templates/datasette-metadata.yml.jinja @@ -76,7 +76,8 @@ databases: {%- endif %} {%- endfor %} {%- endfor %} - ferc1: + + ferc1_dbf: {%- for ferc_ds in data_sources if ferc_ds.name == 'ferc1' %} source: Raw FERC Form 1 DBF Files source_url: https://doi.org/10.5281/zenodo.4127043 @@ -115,6 +116,7 @@ databases: tables: f1_respondent_id: label_column: respondent_name + ferc1_xbrl: {%- for ferc1_xbrl_ds in data_sources if ferc1_xbrl_ds.name == 'ferc1_xbrl' %} source: Raw FERC Form 1 XBRL Files @@ -167,6 +169,45 @@ databases: {%- endif %} {%- endfor %} {%- endfor %} + + ferc2_dbf: + {%- for ferc_ds in data_sources if ferc_ds.name == 'ferc2' %} + source: Raw FERC Form 2 DBF Files + source_url: https://doi.org/10.5281/zenodo.8326697 + about: {{ ferc_ds.title }} + about_url: {{ ferc_ds.path }} + license: {{ ferc_ds.license_pudl.name }} + license_url: {{ ferc_ds.license_pudl.path }} + description_html: | +

{{ ferc_ds.description }}

+

This database is a concatenation of all the individual annual + {{ ferc_ds.title }} Visual FoxPro + databases published from {{ ferc_ds.get_temporal_coverage() }}. It has been minimally altered + to allow all years of data to coexist in the same database. It contains a + wealth of data about the operating costs and financial structures of US + electric utilities, much of which is not publicly available from any + other source. It was produced by Catalyst + Cooperative as part of the + Public Utility + Data Liberation Project.

+

Caution:

+

+ {%- endfor %} + tables: + f2_respondent_id: + label_column: respondent_name + ferc2_xbrl: {%- for ferc2_xbrl_ds in data_sources if ferc2_xbrl_ds.name == 'ferc2_xbrl' %} source: Raw FERC Form 2 XBRL Files @@ -219,6 +260,45 @@ databases: {%- endif %} {%- endfor %} {%- endfor %} + + ferc6_dbf: + {%- for ferc_ds in data_sources if ferc_ds.name == 'ferc6' %} + source: Raw FERC Form 6 DBF Files + source_url: https://doi.org/10.5281/zenodo.8326696 + about: {{ ferc_ds.title }} + about_url: {{ ferc_ds.path }} + license: {{ ferc_ds.license_pudl.name }} + license_url: {{ ferc_ds.license_pudl.path }} + description_html: | +

{{ ferc_ds.description }}

+

This database is a concatenation of all the individual annual + {{ ferc_ds.title }} Visual FoxPro + databases published from {{ ferc_ds.get_temporal_coverage() }}. It has been minimally altered + to allow all years of data to coexist in the same database. It contains a + wealth of data about the operating costs and financial structures of US + electric utilities, much of which is not publicly available from any + other source. It was produced by Catalyst + Cooperative as part of the + Public Utility + Data Liberation Project.

+

Caution:

+

+ {%- endfor %} + tables: + f6_respondent_id: + label_column: respondent_name + ferc6_xbrl: {%- for ferc6_xbrl_ds in data_sources if ferc6_xbrl_ds.name == 'ferc6_xbrl' %} source: Raw FERC Form 6 XBRL Files @@ -271,6 +351,45 @@ databases: {%- endif %} {%- endfor %} {%- endfor %} + + ferc60_dbf: + {%- for ferc_ds in data_sources if ferc_ds.name == 'ferc60' %} + source: Raw FERC Form 60 DBF Files + source_url: https://doi.org/10.5281/zenodo.8326695 + about: {{ ferc_ds.title }} + about_url: {{ ferc_ds.path }} + license: {{ ferc_ds.license_pudl.name }} + license_url: {{ ferc_ds.license_pudl.path }} + description_html: | +

{{ ferc_ds.description }}

+

This database is a concatenation of all the individual annual + {{ ferc_ds.title }} Visual FoxPro + databases published from {{ ferc_ds.get_temporal_coverage() }}. It has been minimally altered + to allow all years of data to coexist in the same database. It contains a + wealth of data about the operating costs and financial structures of US + electric utilities, much of which is not publicly available from any + other source. It was produced by Catalyst + Cooperative as part of the + Public Utility + Data Liberation Project.

+

Caution:

+

+ {%- endfor %} + tables: + f6_respondent_id: + label_column: respondent_name + ferc60_xbrl: {%- for ferc60_xbrl_ds in data_sources if ferc60_xbrl_ds.name == 'ferc60_xbrl' %} source: Raw FERC Form 60 XBRL Files @@ -323,6 +442,7 @@ databases: {%- endif %} {%- endfor %} {%- endfor %} + ferc714_xbrl: {%- for ferc714_xbrl_ds in data_sources if ferc714_xbrl_ds.name == 'ferc714_xbrl' %} source: Raw FERC Form 714 XBRL Files diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py index 5ed3551f3e..5f95bb600b 100644 --- a/src/pudl/output/ferc1.py +++ b/src/pudl/output/ferc1.py @@ -12,7 +12,13 @@ from matplotlib import pyplot as plt from networkx.drawing.nx_agraph import graphviz_layout from pandas._libs.missing import NAType as pandas_NAType -from pydantic import BaseModel, validator +from pydantic import ( + BaseModel, + ConfigDict, + ValidationInfo, + field_validator, + model_validator, +) import pudl from pudl.transform.ferc1 import ( @@ -117,6 +123,71 @@ ), } +MANUAL_DBF_METADATA_FIXES: dict[str, dict[str, str]] = { + "less_noncurrent_portion_of_allowances": { + "dbf2020_row_number": 53, + "dbf2020_table_name": "f1_comp_balance_db", + "dbf2020_row_literal": "(Less) Noncurrent Portion of Allowances", + }, + "less_derivative_instrument_assets_long_term": { + "dbf2020_row_number": 64, + "dbf2020_table_name": "f1_comp_balance_db", + "dbf2020_row_literal": "(Less) Long-Term Portion of Derivative Instrument Assets (175)", + }, + "less_derivative_instrument_assets_hedges_long_term": { + "dbf2020_row_number": 66, + "dbf2020_table_name": "f1_comp_balance_db", + "dbf2020_row_literal": "(Less) Long-Term Portion of Derivative Instrument Assets - Hedges (176)", + }, + "less_long_term_portion_of_derivative_instrument_liabilities": { + "dbf2020_row_number": 51, + "dbf2020_table_name": "f1_bal_sheet_cr", + "dbf2020_row_literal": "(Less) Long-Term Portion of Derivative Instrument Liabilities", + }, + "less_long_term_portion_of_derivative_instrument_liabilities_hedges": { + "dbf2020_row_number": 53, + "dbf2020_table_name": "f1_bal_sheet_cr", + "dbf2020_row_literal": "(Less) Long-Term Portion of Derivative Instrument Liabilities-Hedges", + }, + "other_miscellaneous_operating_revenues": { + "dbf2020_row_number": 25, + "dbf2020_table_name": "f1_elctrc_oper_rev", + "dbf2020_row_literal": "", + }, + "amortization_limited_term_electric_plant": { + "dbf2020_row_number": pd.NA, + "dbf2020_table_name": "f1_dacs_epda", + "dbf2020_row_literal": "Amortization of Limited Term Electric Plant (Account 404) (d)", + }, + "amortization_other_electric_plant": { + "dbf2020_row_number": pd.NA, + "dbf2020_table_name": "f1_dacs_epda", + "dbf2020_row_literal": "Amortization of Other Electric Plant (Acc 405) (e)", + }, + "depreciation_amortization_total": { + "dbf2020_row_number": pd.NA, + "dbf2020_table_name": "f1_dacs_epda", + "dbf2020_row_literal": "Total (f)", + }, + "depreciation_expense": { + "dbf2020_row_number": pd.NA, + "dbf2020_table_name": "f1_dacs_epda", + "dbf2020_row_literal": "Depreciation Expense (Account 403) (b)", + }, + "depreciation_expense_asset_retirement": { + "dbf2020_row_number": pd.NA, + "dbf2020_table_name": "f1_dacs_epda", + "dbf2020_row_literal": "Depreciation Expense for Asset Retirement Costs (Account 403.1) (c)", + }, +} +"""Manually compiled metadata from DBF-only or PUDL-generated xbrl_factios. + +Note: the factoids beginning with "less" here could be removed after a transition +of expectations from assuming the calculation components in any given explosion +is a tree structure to being a dag. These xbrl_factoids were added in +`transform.ferc1` and could be removed upon this transition. +""" + @asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") def denorm_plants_utilities_ferc1( @@ -1005,11 +1076,29 @@ class NodeId(NamedTuple): @asset def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame: - """Grab the stored table of tags and add infered dimension.""" - # NOTE: there are a bunch of duplicate records in xbrl_factoid_rate_base_tags.csv - # Also, these tags are only applicable to the balance_sheet_assets_ferc1 table, but + """Grab the stored tables of tags and add inferred dimension.""" + # Also, these tags may not be applicable to all exploded tables, but # we need to pass in a dataframe with the right structure to all of the exploders, # so we're just re-using this one for the moment. + rate_base_tags = _rate_base_tags(table_dimensions_ferc1=table_dimensions_ferc1) + plant_status_tags = _aggregatable_dimension_tags( + table_dimensions_ferc1=table_dimensions_ferc1, dimension="plant_status" + ) + plant_function_tags = _aggregatable_dimension_tags( + table_dimensions_ferc1=table_dimensions_ferc1, dimension="plant_function" + ) + # We shouldn't have more than one row per tag, so we use a 1:1 validation here. + plant_tags = plant_status_tags.merge( + plant_function_tags, how="outer", on=list(NodeId._fields), validate="1:1" + ) + tags_df = pd.merge( + rate_base_tags, plant_tags, on=list(NodeId._fields), how="outer" + ).astype(pd.StringDtype()) + return tags_df + + +def _rate_base_tags(table_dimensions_ferc1: pd.DataFrame) -> pd.DataFrame: + # NOTE: there are a bunch of duplicate records in xbrl_factoid_rate_base_tags.csv tags_csv = ( importlib.resources.files("pudl.package_data.ferc1") / "xbrl_factoid_rate_base_tags.csv" @@ -1017,14 +1106,7 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame: tags_df = ( pd.read_csv( tags_csv, - usecols=[ - "table_name", - "xbrl_factoid", - "in_rate_base", - "utility_type", - "plant_function", - "plant_status", - ], + usecols=list(NodeId._fields) + ["in_rate_base"], ) .drop_duplicates() .dropna(subset=["table_name", "xbrl_factoid"], how="any") @@ -1033,11 +1115,48 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame: table_dimensions_ferc1, dimensions=["utility_type", "plant_function", "plant_status"], ) - .astype(pd.StringDtype()) ) return tags_df +def _aggregatable_dimension_tags( + table_dimensions_ferc1: pd.DataFrame, + dimension: Literal["plant_status", "plant_function"], +) -> pd.DataFrame: + # make a new lil csv w the manually compiled plant status or dimension + # add in the rest from the table_dims + # merge it into _out_ferc1__explosion_tags + aggregatable_col = f"aggregatable_{dimension}" + tags_csv = ( + importlib.resources.files("pudl.package_data.ferc1") + / f"xbrl_factoid_{dimension}_tags.csv" + ) + dimensions = ["utility_type", "plant_function", "plant_status"] + idx = list(NodeId._fields) + tags_df = ( + pd.read_csv(tags_csv) + .assign(**{dim: pd.NA for dim in dimensions}) + .pipe( + pudl.transform.ferc1.make_calculation_dimensions_explicit, + table_dimensions_ferc1, + dimensions=dimensions, + ) + .astype(pd.StringDtype()) + .set_index(idx) + ) + table_dimensions_ferc1 = table_dimensions_ferc1.set_index(idx) + tags_df = pd.concat( + [ + tags_df, + table_dimensions_ferc1.loc[ + table_dimensions_ferc1.index.difference(tags_df.index) + ], + ] + ).reset_index() + tags_df[aggregatable_col] = tags_df[aggregatable_col].fillna(tags_df[dimension]) + return tags_df[tags_df[aggregatable_col] != "total"] + + def exploded_table_asset_factory( root_table: str, table_names_to_explode: list[str], @@ -1129,7 +1248,7 @@ def create_exploded_table_assets() -> list[AssetsDefinition]: NodeId( table_name="balance_sheet_assets_ferc1", xbrl_factoid="assets_and_other_debits", - utility_type=pd.NA, + utility_type="total", plant_status=pd.NA, plant_function=pd.NA, ) @@ -1148,7 +1267,7 @@ def create_exploded_table_assets() -> list[AssetsDefinition]: NodeId( table_name="balance_sheet_liabilities_ferc1", xbrl_factoid="liabilities_and_other_credits", - utility_type=pd.NA, + utility_type="total", plant_status=pd.NA, plant_function=pd.NA, ) @@ -1354,6 +1473,14 @@ def get_dbf_row_metadata(pudl_table: str, year: int = 2020): validate="many_to_one", ) + # Add manual fixes for created factoids + fixes = pd.DataFrame(MANUAL_DBF_METADATA_FIXES).T + exploded_metadata = exploded_metadata.set_index("xbrl_factoid") + # restrict fixes to only those that are actually in the meta. + fixes = fixes.loc[fixes.index.intersection(exploded_metadata.index)] + exploded_metadata.loc[fixes.index, fixes.columns] = fixes + exploded_metadata = exploded_metadata.reset_index() + return exploded_metadata @cached_property @@ -1605,68 +1732,73 @@ class XbrlCalculationForestFerc1(BaseModel): # Not sure if dynamically basing this on NodeId is really a good idea here. calc_cols: list[str] = list(NodeId._fields) - parent_cols: list[str] | None = None exploded_meta: pd.DataFrame = pd.DataFrame() exploded_calcs: pd.DataFrame = pd.DataFrame() seeds: list[NodeId] = [] tags: pd.DataFrame = pd.DataFrame() group_metric_checks: GroupMetricChecks = GroupMetricChecks() + model_config = ConfigDict( + arbitrary_types_allowed=True, ignored_types=(cached_property,) + ) - class Config: - """Allow the class to store a dataframe.""" - - arbitrary_types_allowed = True - keep_untouched = (cached_property,) - - @validator("parent_cols", always=True) - def set_parent_cols(cls, v, values) -> list[str]: - """A convenience property to generate parent column.""" - return [col + "_parent" for col in values["calc_cols"]] + @property + def parent_cols(self: Self) -> list[str]: + """Construct parent_cols based on the provided calc_cols.""" + return [col + "_parent" for col in self.calc_cols] - @validator("exploded_calcs") - def unique_associations(cls, v: pd.DataFrame, values) -> pd.DataFrame: + @model_validator(mode="after") + def unique_associations(self: Self): """Ensure parent-child associations in exploded calculations are unique.""" - pks = values["calc_cols"] + values["parent_cols"] - dupes = v.duplicated(subset=pks, keep=False) + pks = self.calc_cols + self.parent_cols + dupes = self.exploded_calcs.duplicated(subset=pks, keep=False) if dupes.any(): logger.warning( "Consolidating non-unique associations found in exploded_calcs:\n" - f"{v.loc[dupes]}" + f"{self.exploded_calcs.loc[dupes]}" ) - # Drop all duplicates with null weights -- this is a temporary fix to an issue - # from upstream. - assert not v.duplicated(subset=pks, keep=False).any() - return v + assert not self.exploded_calcs.duplicated(subset=pks, keep=False).any() + return self - @validator("exploded_calcs") - def calcs_have_required_cols(cls, v: pd.DataFrame, values) -> pd.DataFrame: + @model_validator(mode="after") + def calcs_have_required_cols(self: Self): """Ensure exploded calculations include all required columns.""" - required_cols = values["parent_cols"] + values["calc_cols"] + ["weight"] - missing_cols = [col for col in required_cols if col not in v.columns] + required_cols = self.parent_cols + self.calc_cols + ["weight"] + missing_cols = [ + col for col in required_cols if col not in self.exploded_calcs.columns + ] if missing_cols: raise ValueError( f"Exploded calculations missing expected columns: {missing_cols=}" ) - return v[required_cols] + self.exploded_calcs = self.exploded_calcs.loc[:, required_cols] + return self - @validator("exploded_calcs") - def calc_parents_notna(cls, v: pd.DataFrame) -> pd.DataFrame: + @model_validator(mode="after") + def calc_parents_notna(self: Self): """Ensure that parent table_name and xbrl_factoid columns are non-null.""" - if v[["table_name_parent", "xbrl_factoid_parent"]].isna().any(axis=None): + if ( + self.exploded_calcs[["table_name_parent", "xbrl_factoid_parent"]] + .isna() + .any(axis=None) + ): raise AssertionError("Null parent table name or xbrl_factoid found.") - return v + return self - @validator("tags") - def tags_have_required_cols(cls, v: pd.DataFrame, values) -> pd.DataFrame: + @field_validator("tags") + @classmethod + def tags_have_required_cols( + cls, v: pd.DataFrame, info: ValidationInfo + ) -> pd.DataFrame: """Ensure tagging dataframe contains all required index columns.""" - missing_cols = [col for col in values["calc_cols"] if col not in v.columns] + missing_cols = [col for col in info.data["calc_cols"] if col not in v.columns] if missing_cols: raise ValueError( f"Tagging dataframe was missing expected columns: {missing_cols=}" ) return v - @validator("tags") + @field_validator("tags") + @classmethod def tags_cols_notnull(cls, v: pd.DataFrame) -> pd.DataFrame: """Ensure all tags have non-null table_name and xbrl_factoid.""" null_idx_rows = v[v.table_name.isna() | v.xbrl_factoid.isna()] @@ -1679,29 +1811,30 @@ def tags_cols_notnull(cls, v: pd.DataFrame) -> pd.DataFrame: v = v.dropna(subset=["table_name", "xbrl_factoid"]) return v - @validator("tags") - def single_valued_tags(cls, v: pd.DataFrame, values) -> pd.DataFrame: + @field_validator("tags") + @classmethod + def single_valued_tags(cls, v: pd.DataFrame, info: ValidationInfo) -> pd.DataFrame: """Ensure all tags have unique values.""" - dupes = v.duplicated(subset=values["calc_cols"], keep=False) + dupes = v.duplicated(subset=info.data["calc_cols"], keep=False) if dupes.any(): logger.warning( f"Found {dupes.sum()} duplicate tag records:\n{v.loc[dupes]}" ) return v - @validator("seeds") - def seeds_within_bounds(cls, v: pd.DataFrame, values) -> pd.DataFrame: + @model_validator(mode="after") + def seeds_within_bounds(self: Self): """Ensure that all seeds are present within exploded_calcs index. For some reason this validator is being run before exploded_calcs has been added to the values dictionary, which doesn't make sense, since "seeds" is defined after exploded_calcs in the model. """ - all_nodes = values["exploded_calcs"].set_index(values["parent_cols"]).index - bad_seeds = [seed for seed in v if seed not in all_nodes] + all_nodes = self.exploded_calcs.set_index(self.parent_cols).index + bad_seeds = [seed for seed in self.seeds if seed not in all_nodes] if bad_seeds: raise ValueError(f"Seeds missing from exploded_calcs index: {bad_seeds=}") - return v + return self def exploded_calcs_to_digraph( self: Self, @@ -1741,10 +1874,17 @@ def node_attrs(self: Self) -> dict[NodeId, dict[str, dict[str, str]]]: tags_dict = ( self.tags.convert_dtypes().set_index(self.calc_cols).to_dict(orient="index") ) + # Drop None tags created by combining multiple tagging CSVs + clean_tags_dict = { + k: {a: b for a, b in v.items() if b is not None} + for k, v in tags_dict.items() + } node_attrs = ( pd.DataFrame( - index=pd.MultiIndex.from_tuples(tags_dict.keys(), names=self.calc_cols), - data={"tags": list(tags_dict.values())}, + index=pd.MultiIndex.from_tuples( + clean_tags_dict.keys(), names=self.calc_cols + ), + data={"tags": list(clean_tags_dict.values())}, ) .reset_index() # Type conversion is necessary to get pd.NA in the index: @@ -1752,16 +1892,29 @@ def node_attrs(self: Self) -> dict[NodeId, dict[str, dict[str, str]]]: # We need a dictionary for *all* nodes, not just those with tags. .merge( self.exploded_meta.loc[:, self.calc_cols], - how="right", + how="left", on=self.calc_cols, validate="one_to_many", + indicator=True, ) # For nodes with no tags, we assign an empty dictionary: .assign(tags=lambda x: np.where(x["tags"].isna(), {}, x["tags"])) + ) + lefties = node_attrs[ + (node_attrs._merge == "left_only") + & (node_attrs.table_name.isin(self.table_names)) + ] + if not lefties.empty: + logger.warning( + f"Found {len(lefties)} tags that only exist in our manually compiled " + "tags when expected none. Ensure the compiled tags match the metadata." + f"Mismatched tags:\n{lefties}" + ) + return ( + node_attrs.drop(columns=["_merge"]) .set_index(self.calc_cols) .to_dict(orient="index") ) - return node_attrs @cached_property def edge_attrs(self: Self) -> dict[Any, Any]: diff --git a/src/pudl/output/pudltabl.py b/src/pudl/output/pudltabl.py index dc57c7a38b..fe04e62a2a 100644 --- a/src/pudl/output/pudltabl.py +++ b/src/pudl/output/pudltabl.py @@ -45,7 +45,7 @@ class PudlTabl: def __init__( self: Self, - pudl_engine: sa.engine.Engine, + pudl_engine: sa.Engine, freq: Literal["AS", "MS", None] = None, start_date: str | date | datetime | pd.Timestamp = None, end_date: str | date | datetime | pd.Timestamp = None, @@ -94,7 +94,7 @@ def __init__( "PudlTabl needs pudl_engine to be a SQLAlchemy Engine, but we " f"got a {type(pudl_engine)}." ) - self.pudl_engine: sa.engine.Engine = pudl_engine + self.pudl_engine: sa.Engine = pudl_engine if freq not in (None, "AS", "MS"): raise ValueError( diff --git a/src/pudl/package_data/eia860/file_map.csv b/src/pudl/package_data/eia860/file_map.csv index 8ee4369f17..fabf950288 100644 --- a/src/pudl/package_data/eia860/file_map.csv +++ b/src/pudl/package_data/eia860/file_map.csv @@ -22,4 +22,4 @@ emission_control_strategies,-1,-1,-1,-1,-1,-1,-1,-1,EnviroEquipY09.xls,EnviroEqu cooling_equipment,-1,-1,-1,-1,-1,-1,-1,-1,EnviroEquipY09.xls,EnviroEquipY2010.xls,EnviroEquip.xlsx,EnviroEquipY2012.xlsx,6_2_EnviroEquip_Y2013.xlsx,6_2_EnviroEquip_Y2014.xlsx,6_2_EnviroEquip_Y2015.xlsx,6_2_EnviroEquip_Y2016.xlsx,6_2_EnviroEquip_Y2017.xlsx,6_2_EnviroEquip_Y2018.xlsx,6_2_EnviroEquip_Y2019.xlsx,6_2_EnviroEquip_Y2020.xlsx,6_2_EnviroEquip_Y2021.xlsx,6_2_EnviroEquip_Y2022.xlsx fgp_equipment,-1,-1,-1,-1,-1,-1,-1,-1,EnviroEquipY09.xls,EnviroEquipY2010.xls,EnviroEquip.xlsx,EnviroEquipY2012.xlsx,6_2_EnviroEquip_Y2013.xlsx,6_2_EnviroEquip_Y2014.xlsx,6_2_EnviroEquip_Y2015.xlsx,6_2_EnviroEquip_Y2016.xlsx,6_2_EnviroEquip_Y2017.xlsx,6_2_EnviroEquip_Y2018.xlsx,6_2_EnviroEquip_Y2019.xlsx,6_2_EnviroEquip_Y2020.xlsx,6_2_EnviroEquip_Y2021.xlsx,6_2_EnviroEquip_Y2022.xlsx fgd_equipment,-1,-1,-1,-1,-1,-1,-1,-1,EnviroEquipY09.xls,EnviroEquipY2010.xls,EnviroEquip.xlsx,EnviroEquipY2012.xlsx,6_2_EnviroEquip_Y2013.xlsx,6_2_EnviroEquip_Y2014.xlsx,6_2_EnviroEquip_Y2015.xlsx,6_2_EnviroEquip_Y2016.xlsx,6_2_EnviroEquip_Y2017.xlsx,6_2_EnviroEquip_Y2018.xlsx,6_2_EnviroEquip_Y2019.xlsx,6_2_EnviroEquip_Y2020.xlsx,6_2_EnviroEquip_Y2021.xlsx,6_2_EnviroEquip_Y2022.xlsx -stack_flue_equipment,-1,-1,-1,-1,-1,-1,-1,-1,EnviroEquipY09.xls,EnviroEquipY2010.xls,EnviroEquip.xlsx,EnviroEquipY2012.xlsx,6_2_EnviroEquip_Y2013.xlsx,6_2_EnviroEquip_Y2014.xlsx,6_2_EnviroEquip_Y2015.xlsx,6_2_EnviroEquip_Y2016.xlsx,6_2_EnviroEquip_Y2017.xlsx,6_2_EnviroEquip_Y2018.xlsx,6_2_EnviroEquip_Y2019.xlsx,6_2_EnviroEquip_Y2020.xlsx,6_2_EnviroEquip_Y2021.xlsx,6_2_EnviroEquip_Y2022.xlsx \ No newline at end of file +stack_flue_equipment,-1,-1,-1,-1,-1,-1,-1,-1,EnviroEquipY09.xls,EnviroEquipY2010.xls,EnviroEquip.xlsx,EnviroEquipY2012.xlsx,6_2_EnviroEquip_Y2013.xlsx,6_2_EnviroEquip_Y2014.xlsx,6_2_EnviroEquip_Y2015.xlsx,6_2_EnviroEquip_Y2016.xlsx,6_2_EnviroEquip_Y2017.xlsx,6_2_EnviroEquip_Y2018.xlsx,6_2_EnviroEquip_Y2019.xlsx,6_2_EnviroEquip_Y2020.xlsx,6_2_EnviroEquip_Y2021.xlsx,6_2_EnviroEquip_Y2022.xlsx diff --git a/src/pudl/package_data/eia860/skiprows.csv b/src/pudl/package_data/eia860/skiprows.csv index 4bcc0ddc7e..72a6cc4ab3 100644 --- a/src/pudl/package_data/eia860/skiprows.csv +++ b/src/pudl/package_data/eia860/skiprows.csv @@ -22,4 +22,4 @@ emission_control_strategies,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 cooling_equipment,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 fgp_equipment,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 fgd_equipment,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 -stack_flue_equipment,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 \ No newline at end of file +stack_flue_equipment,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 diff --git a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv index a4291398e6..6db1f22b43 100644 --- a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv @@ -66,4 +66,4 @@ ash_content_pct_december,ash_content_december,ash_content_december,ash_content_d total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity balancing_authority_code_eia,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,ba_code report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year -early_release,,,,,,,,,,,,,,,, \ No newline at end of file +early_release,,,,,,,,,,,,,,,, diff --git a/src/pudl/package_data/eia923/column_maps/emissions_control.csv b/src/pudl/package_data/eia923/column_maps/emissions_control.csv index ba6e1c3fe4..0650bba021 100644 --- a/src/pudl/package_data/eia923/column_maps/emissions_control.csv +++ b/src/pudl/package_data/eia923/column_maps/emissions_control.csv @@ -21,4 +21,4 @@ fgd_sorbent_consumption_1000_tons,fgd_sorbent_quantity_thousand_tons,fgd_sorbent fgd_electricity_consumption_mwh,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours,fgd_electricity_consumption_megawatthours mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency mercury_emission_rate_lb_per_trillion_btu,,,,,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate -acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency \ No newline at end of file +acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency diff --git a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv index 347dea3b38..b40bb32e14 100644 --- a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv +++ b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv @@ -31,4 +31,4 @@ natural_gas_delivery_contract_type_code,,,,,,,natural_gas_delivery_contract_type moisture_content_pct,,,,,,,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content chlorine_content_ppm,,,,,,,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content balancing_authority_code_eia,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,ba_code -early_release,,,,,,,,,,,,,,,, \ No newline at end of file +early_release,,,,,,,,,,,,,,,, diff --git a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv index 07d676a327..8b8703b825 100644 --- a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv @@ -95,4 +95,4 @@ total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtus,total_fuel_consumptio elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year -early_release,,,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file +early_release,,,,,,,,,,,,,,,,,,,,,,, diff --git a/src/pudl/package_data/eia923/column_maps/generator.csv b/src/pudl/package_data/eia923/column_maps/generator.csv index 423f7aa4bc..922a0f18d7 100644 --- a/src/pudl/package_data/eia923/column_maps/generator.csv +++ b/src/pudl/package_data/eia923/column_maps/generator.csv @@ -28,4 +28,4 @@ net_generation_mwh_december,net_generation_december,net_generation_december,net_ net_generation_mwh_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date balancing_authority_code_eia,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,ba_code report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year -early_release,,,,,,,,,,,,,,,, \ No newline at end of file +early_release,,,,,,,,,,,,,,,, diff --git a/src/pudl/package_data/eia923/column_maps/plant_frame.csv b/src/pudl/package_data/eia923/column_maps/plant_frame.csv index f3e723924b..30b04c412e 100644 --- a/src/pudl/package_data/eia923/column_maps/plant_frame.csv +++ b/src/pudl/package_data/eia923/column_maps/plant_frame.csv @@ -9,4 +9,4 @@ plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name combined_heat_power,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status reporting_frequency_code,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,respondent_frequency,respondent_frequency,respondent_frequency,respondent_frequency nameplate_capacity_mw,nameplate_capacity_mw,,,,,,,,,,,, -early_release,,,,,,,,,,,,, \ No newline at end of file +early_release,,,,,,,,,,,,, diff --git a/src/pudl/package_data/eia923/column_maps/stocks.csv b/src/pudl/package_data/eia923/column_maps/stocks.csv index 23c8f0020e..e04e7f3c69 100644 --- a/src/pudl/package_data/eia923/column_maps/stocks.csv +++ b/src/pudl/package_data/eia923/column_maps/stocks.csv @@ -36,4 +36,4 @@ petcoke_september,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,pe petcoke_october,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_october,petcoke_oct,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october petcoke_november,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_november,petcoke_nov,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november petcoke_december,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_december,petcoke_dec,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december -early_release,,,,,,,,,,,,,,,,,,,,,,, \ No newline at end of file +early_release,,,,,,,,,,,,,,,,,,,,,,, diff --git a/src/pudl/package_data/eia923/file_map.csv b/src/pudl/package_data/eia923/file_map.csv index 73dc128416..ecd1d35d59 100644 --- a/src/pudl/package_data/eia923/file_map.csv +++ b/src/pudl/package_data/eia923/file_map.csv @@ -1,14 +1,14 @@ page,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023 -boiler_fuel,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx +boiler_fuel,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx coal_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,-1,-1,-1 -energy_storage,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -generation_fuel,f906920y2001.xls,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -generator,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -oil_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx +energy_storage,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +generation_fuel,f906920y2001.xls,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +generator,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +oil_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx petcoke_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,-1,-1,-1 -plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -puerto_rico,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -stocks,f906920y2001.xls,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Early_Release.xlsx,EIA923_Schedules_2_3_4_5_M_04_2023_21JUN2023.xlsx -emissions_control,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedule_8_Annual_Environmental_Information_2012_Final_Revision.xlsx,EIA923_Schedule_8_PartsA-D_EnvData_2013_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2014_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2015_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2016_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Envir_Infor_2017_Final.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2018_Final.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2019_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2020_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2021_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2022_Early_Release.xlsx,-1 \ No newline at end of file +plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +puerto_rico,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +stocks,f906920y2001.xls,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2020_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2022_Final.xlsx,EIA923_Schedules_2_3_4_5_M_08_2023_19OCT2023.xlsx +emissions_control,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedule_8_Annual_Environmental_Information_2012_Final_Revision.xlsx,EIA923_Schedule_8_PartsA-D_EnvData_2013_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2014_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2015_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2016_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Envir_Infor_2017_Final.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2018_Final.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2019_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2020_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2021_Final_Revision.xlsx,EIA923_Schedule_8_Annual_Environmental_Information_2022_Early_Release.xlsx,-1 diff --git a/src/pudl/package_data/eia923/page_map.csv b/src/pudl/package_data/eia923/page_map.csv index f66e5296eb..2134f597f6 100644 --- a/src/pudl/package_data/eia923/page_map.csv +++ b/src/pudl/package_data/eia923/page_map.csv @@ -11,4 +11,4 @@ generator,-1,-1,-1,-1,-1,-1,-1,6,6,6,6,6,6,7,7,7,8,8,8,8,8,6,6 fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,7,7,7,7,7,7,8,8,8,9,9,9,9,9,7,7 plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,8,8,8,9,9,9,10,10,10,10,10,8,8 plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,11,11,11,9,9 -emissions_control,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2,2,2,2,2,2,2,2,2,2,-1 \ No newline at end of file +emissions_control,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2,2,2,2,2,2,2,2,2,2,-1 diff --git a/src/pudl/package_data/eia923/skipfooter.csv b/src/pudl/package_data/eia923/skipfooter.csv index ccaa9d6405..17088feb43 100644 --- a/src/pudl/package_data/eia923/skipfooter.csv +++ b/src/pudl/package_data/eia923/skipfooter.csv @@ -11,4 +11,4 @@ generator,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 fuel_receipts_costs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 plant_frame,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 plant_frame_puerto_rico,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -emissions_control,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 \ No newline at end of file +emissions_control,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/src/pudl/package_data/eia923/skiprows.csv b/src/pudl/package_data/eia923/skiprows.csv index 758744ab14..f608d488e1 100644 --- a/src/pudl/package_data/eia923/skiprows.csv +++ b/src/pudl/package_data/eia923/skiprows.csv @@ -1,14 +1,14 @@ year_index,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023 -generation_fuel,7,7,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,5,5,5,6,5 -puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5,6,6,6,7,5 -stocks,7,7,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,5,5,5,6,4 -oil_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,4 +generation_fuel,7,7,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,5,5,5,5,5 +puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5,6,6,6,6,5 +stocks,7,7,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,5,5,5,5,4 +oil_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,4 coal_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,-1,-1,-1 petcoke_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,-1,-1,-1 -energy_storage,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5,5,5,6,4 -boiler_fuel,-1,-1,-1,-1,-1,-1,-1,7,7,7,5,5,5,5,5,5,5,5,5,5,5,6,4 -generator,-1,-1,-1,-1,-1,-1,-1,7,7,7,5,5,5,5,5,5,5,5,5,5,5,6,4 -fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,7,6,7,4,4,4,4,4,4,4,4,4,4,4,5,3 -plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,4,4,4,4,4,4,4,4,5,3 -plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,5,3 -emissions_control,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,4,4,4,4,4,4,4,5,-1 \ No newline at end of file +energy_storage,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5,5,5,5,4 +boiler_fuel,-1,-1,-1,-1,-1,-1,-1,7,7,7,5,5,5,5,5,5,5,5,5,5,5,5,4 +generator,-1,-1,-1,-1,-1,-1,-1,7,7,7,5,5,5,5,5,5,5,5,5,5,5,5,4 +fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,7,6,7,4,4,4,4,4,4,4,4,4,4,4,4,3 +plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,4,4,4,4,4,4,4,4,4,3 +plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,4,3 +emissions_control,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,4,4,4,4,4,4,4,4,-1 diff --git a/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv b/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv index b0e6b7398f..a39c371bc8 100644 --- a/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv +++ b/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv @@ -1,42 +1,43 @@ -table_name_parent,xbrl_factoid_parent,table_name,xbrl_factoid,weight,utility_type,plant_function,plant_status +table_name_parent,xbrl_factoid_parent,table_name,xbrl_factoid,utility_type,plant_status,plant_function,weight balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,abandonment_of_leases,,,, -balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,1.0,total,, +balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,total,,,1.0 balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,amortization_of_other_utility_plant_utility_plant_in_service,,,, balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,amortization_of_plant_acquisition_adjustment,,,, balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_in_service,,,, balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_leased_to_others,,,, balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_and_amortization_utility_plant_held_for_future_use,,,, balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_utility_plant_in_service,,,, -balance_sheet_assets_ferc1,construction_work_in_progress,utility_plant_summary_ferc1,construction_work_in_progress,1.0,total,, +balance_sheet_assets_ferc1,construction_work_in_progress,utility_plant_summary_ferc1,construction_work_in_progress,total,,,1.0 balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,derivative_instrument_assets_hedges_long_term,,,, balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,derivative_instrument_assets_long_term,,,, -balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_derivative_instrument_assets_hedges_long_term,-1.0,,, -balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_derivative_instrument_assets_long_term,-1.0,,, -balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_noncurrent_portion_of_allowances,-1.0,,, +balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_derivative_instrument_assets_hedges_long_term,total,,,-1.0 +balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_derivative_instrument_assets_long_term,total,,,-1.0 +balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_noncurrent_portion_of_allowances,total,,,-1.0 balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,noncurrent_portion_of_allowances,,,, -balance_sheet_assets_ferc1,deferred_debits,balance_sheet_assets_ferc1,preliminary_natural_gas_survey_and_investigation_charges,1.0,,, -balance_sheet_assets_ferc1,deferred_debits,balance_sheet_assets_ferc1,preliminary_natural_gas_and_other_survey_and_investigation_charges,1.0,,, -balance_sheet_assets_ferc1,nuclear_fuel_net,balance_sheet_assets_ferc1,nuclear_fuel,1.0,,, +balance_sheet_assets_ferc1,deferred_debits,balance_sheet_assets_ferc1,preliminary_natural_gas_survey_and_investigation_charges,total,,,1.0 +balance_sheet_assets_ferc1,deferred_debits,balance_sheet_assets_ferc1,preliminary_natural_gas_and_other_survey_and_investigation_charges,total,,,1.0 +balance_sheet_assets_ferc1,nuclear_fuel_net,balance_sheet_assets_ferc1,nuclear_fuel,total,,,1.0 balance_sheet_assets_ferc1,nuclear_fuel_net,nuclear_fuel_materials_ferc1,nuclear_fuel_materials_and_assemblies,,,, balance_sheet_assets_ferc1,nuclear_fuel_net,nuclear_fuel_materials_ferc1,spent_nuclear_fuel,,,, -balance_sheet_assets_ferc1,other_property_and_investments,balance_sheet_assets_ferc1,special_funds_all,1.0,,, +balance_sheet_assets_ferc1,other_property_and_investments,balance_sheet_assets_ferc1,special_funds_all,total,,,1.0 balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,balance_sheet_assets_ferc1,construction_work_in_progress,,,, balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,balance_sheet_assets_ferc1,utility_plant,,,, balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,utility_plant_summary_ferc1,utility_plant_acquisition_adjustment,,,, -balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,utility_plant_summary_ferc1,utility_plant_and_construction_work_in_progress,1.0,total,, +balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,utility_plant_summary_ferc1,utility_plant_and_construction_work_in_progress,total,,,1.0 balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,utility_plant_summary_ferc1,utility_plant_held_for_future_use,,,, balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,,,, balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,utility_plant_summary_ferc1,utility_plant_leased_to_others,,,, balance_sheet_assets_ferc1,utility_plant_net,balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,,,, balance_sheet_assets_ferc1,utility_plant_net,balance_sheet_assets_ferc1,utility_plant_and_construction_work_in_progress,,,, -balance_sheet_assets_ferc1,utility_plant_net,utility_plant_summary_ferc1,utility_plant_net,1.0,total,, -balance_sheet_liabilities_ferc1,deferred_credits,balance_sheet_liabilities_ferc1,accumulated_deferred_income_taxes,1.0,,, -balance_sheet_liabilities_ferc1,retained_earnings,retained_earnings_ferc1,retained_earnings,1.0,,, +balance_sheet_assets_ferc1,utility_plant_net,utility_plant_summary_ferc1,utility_plant_net,total,,,1.0 +balance_sheet_liabilities_ferc1,deferred_credits,balance_sheet_liabilities_ferc1,accumulated_deferred_income_taxes,total,,,1.0 +balance_sheet_liabilities_ferc1,retained_earnings,retained_earnings_ferc1,retained_earnings,total,,,1.0 balance_sheet_liabilities_ferc1,current_and_accrued_liabilities,balance_sheet_liabilities_ferc1,long_term_portion_of_derivative_instrument_liabilities,,,, -balance_sheet_liabilities_ferc1,current_and_accrued_liabilities,balance_sheet_liabilities_ferc1,less_long_term_portion_of_derivative_instrument_liabilities,-1.0,,, +balance_sheet_liabilities_ferc1,current_and_accrued_liabilities,balance_sheet_liabilities_ferc1,less_long_term_portion_of_derivative_instrument_liabilities,total,,,-1.0 balance_sheet_liabilities_ferc1,current_and_accrued_liabilities,balance_sheet_liabilities_ferc1,long_term_portion_of_derivative_instrument_liabilities_hedges,,,, -balance_sheet_liabilities_ferc1,current_and_accrued_liabilities,balance_sheet_liabilities_ferc1,less_long_term_portion_of_derivative_instrument_liabilities_hedges,-1.0,,, -electric_energy_sources_ferc1,sources_of_energy,electric_energy_sources_ferc1,megawatt_hours_purchased,1.0,,, +balance_sheet_liabilities_ferc1,current_and_accrued_liabilities,balance_sheet_liabilities_ferc1,less_long_term_portion_of_derivative_instrument_liabilities_hedges,total,,,-1.0 +balance_sheet_liabilities_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,total,,,1.0 +electric_energy_sources_ferc1,sources_of_energy,electric_energy_sources_ferc1,megawatt_hours_purchased,,,,1.0 electric_operating_expenses_ferc1,power_production_expenses_hydraulic_power,electric_operating_expenses_ferc1,electric_expenses_hydraulic_power_generation,,,, electric_operating_expenses_ferc1,power_production_expenses_hydraulic_power,electric_operating_expenses_ferc1,hydraulic_expenses,,,, electric_operating_expenses_ferc1,power_production_expenses_hydraulic_power,electric_operating_expenses_ferc1,maintenance_of_electric_plant_hydraulic_power_generation,,,, @@ -67,82 +68,81 @@ electric_operating_expenses_ferc1,power_production_expenses_steam_power,electric electric_operating_expenses_ferc1,power_production_expenses_steam_power,plants_hydro_ferc1,opex_operations,,,, electric_operating_expenses_ferc1,power_production_expenses_steam_power,plants_pumped_storage_ferc1,opex_operations,,,, electric_operating_expenses_ferc1,power_production_expenses_steam_power,plants_steam_ferc1,opex_operations,,,, -electric_operating_expenses_ferc1,transmission_operation_expense,electric_operating_expenses_ferc1,load_dispatching_transmission_expense,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,forfeited_discounts,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,interdepartmental_rents,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,miscellaneous_revenue,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,miscellaneous_service_revenues,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,other_electric_revenue,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,other_miscellaneous_operating_revenues,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,regional_transmission_service_revenues,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,rent_from_electric_property,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,revenues_from_transmission_of_electricity_of_others,1.0,,, -electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,sales_of_water_and_water_power,1.0,,, -electric_operating_revenues_ferc1,sales_to_ultimate_consumers,electric_operating_revenues_ferc1,large_or_industrial,1.0,,, -electric_operating_revenues_ferc1,sales_to_ultimate_consumers,electric_operating_revenues_ferc1,small_or_commercial,1.0,,, +electric_operating_expenses_ferc1,transmission_operation_expense,electric_operating_expenses_ferc1,load_dispatching_transmission_expense,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,forfeited_discounts,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,interdepartmental_rents,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,miscellaneous_revenue,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,miscellaneous_service_revenues,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,other_electric_revenue,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,other_miscellaneous_operating_revenues,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,regional_transmission_service_revenues,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,rent_from_electric_property,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,revenues_from_transmission_of_electricity_of_others,electric,,,1.0 +electric_operating_revenues_ferc1,other_operating_revenues,electric_operating_revenues_ferc1,sales_of_water_and_water_power,electric,,,1.0 +electric_operating_revenues_ferc1,sales_to_ultimate_consumers,electric_operating_revenues_ferc1,large_or_industrial,electric,,,1.0 +electric_operating_revenues_ferc1,sales_to_ultimate_consumers,electric_operating_revenues_ferc1,small_or_commercial,electric,,,1.0 electric_operating_revenues_ferc1,sales_to_ultimate_consumers,electricity_sales_by_rate_schedule_ferc1,commercial_and_industrial,,,, -electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,book_cost_of_asset_retirement_costs,1.0,,, -electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,depreciation_provision,1.0,,, -electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,net_charges_for_retired_plant,1.0,,, -electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,other_adjustments_to_accumulated_depreciation,1.0,,, -electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,starting_balance,1.0,,, -income_statement_ferc1,amortization_and_depletion_of_utility_plant,depreciation_amortization_summary_ferc1,amortization_limited_term_electric_plant,1.0,electric,total, -income_statement_ferc1,amortization_and_depletion_of_utility_plant,depreciation_amortization_summary_ferc1,amortization_other_electric_plant,1.0,electric,total, -income_statement_ferc1,depreciation_expense,depreciation_amortization_summary_ferc1,depreciation_expense,1.0,electric,total, -income_statement_ferc1,depreciation_expense_for_asset_retirement_costs,depreciation_amortization_summary_ferc1,depreciation_expense_asset_retirement,1.0,electric,total, -income_statement_ferc1,income_before_extraordinary_items,income_statement_ferc1,net_utility_operating_income,1.0,,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,distribution_maintenance_expense_electric,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,hydraulic_power_generation_maintenance_expense,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,maintenance_of_general_plant,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,nuclear_power_generation_maintenance_expense,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,other_power_generation_maintenance_expense,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,regional_market_maintenance_expense,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,steam_power_generation_maintenance_expense,1.0,electric,, -income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,transmission_maintenance_expense_electric,1.0,electric,, -income_statement_ferc1,operating_revenues,electric_operating_revenues_ferc1,electric_operating_revenues,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,administrative_and_general_operation_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,customer_account_expenses,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,customer_service_and_information_expenses,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,distribution_operation_expenses_electric,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,hydraulic_power_generation_operations_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,nuclear_power_generation_operations_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,other_power_generation_operations_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,regional_market_operation_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,sales_expenses,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,steam_power_generation_operations_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,transmission_operation_expense,1.0,electric,, -income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,power_production_expenses,1.0,electric,, -income_statement_ferc1,other_income_deductions,income_statement_ferc1,miscellaneous_deductions,1.0,,, -income_statement_ferc1,taxes_on_other_income_and_deductions,income_statement_ferc1,investment_tax_credits,-1.0,,, -plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,distribution_plant,1.0,,, -plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,general_plant,1.0,,, -plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,intangible_plant,1.0,,, -plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,production_plant,1.0,,, -plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,transmission_and_market_operation_plant_regional_transmission_and_market_operation_plant,1.0,,, -plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,transmission_plant,1.0,,, -retained_earnings_ferc1,appropriated_retained_earnings_including_reserve_amortization,retained_earnings_ferc1,appropriated_retained_earnings,1.0,,, -retained_earnings_ferc1,appropriated_retained_earnings_including_reserve_amortization,retained_earnings_ferc1,appropriated_retained_earnings_amortization_reserve_federal,1.0,,, -retained_earnings_ferc1,retained_earnings,retained_earnings_ferc1,appropriated_retained_earnings_including_reserve_amortization,1.0,,, -retained_earnings_ferc1,retained_earnings,retained_earnings_ferc1,unappropriated_retained_earnings,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,adjustments_to_retained_earnings_credit,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,adjustments_to_retained_earnings_debit,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,appropriations_of_retained_earnings,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,balance_transferred_from_income,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,dividends_declared_common_stock,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,dividends_declared_preferred_stock,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,transfers_from_unappropriated_undistributed_subsidiary_earnings,1.0,,, -retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,unappropriated_retained_earnings_previous_year,1.0,,, -retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,changes_unappropriated_undistributed_subsidiary_earnings_credits,1.0,,, -retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,dividends_received,-1.0,,, -retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,equity_in_earnings_of_subsidiary_companies,1.0,,, -retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings_previous_year,1.0,,, -balance_sheet_liabilities_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,1.0,,, +electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,book_cost_of_asset_retirement_costs,electric,,,1.0 +electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,depreciation_provision,electric,,,1.0 +electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,net_charges_for_retired_plant,electric,,,1.0 +electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,other_adjustments_to_accumulated_depreciation,electric,,,1.0 +electric_plant_depreciation_changes_ferc1,ending_balance,electric_plant_depreciation_changes_ferc1,starting_balance,electric,,,1.0 +income_statement_ferc1,amortization_and_depletion_of_utility_plant,depreciation_amortization_summary_ferc1,amortization_limited_term_electric_plant,electric,,total,1.0 +income_statement_ferc1,amortization_and_depletion_of_utility_plant,depreciation_amortization_summary_ferc1,amortization_other_electric_plant,electric,,total,1.0 +income_statement_ferc1,depreciation_expense,depreciation_amortization_summary_ferc1,depreciation_expense,electric,,total,1.0 +income_statement_ferc1,depreciation_expense_for_asset_retirement_costs,depreciation_amortization_summary_ferc1,depreciation_expense_asset_retirement,electric,,total,1.0 +income_statement_ferc1,income_before_extraordinary_items,income_statement_ferc1,net_utility_operating_income,,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,distribution_maintenance_expense_electric,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,hydraulic_power_generation_maintenance_expense,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,maintenance_of_general_plant,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,nuclear_power_generation_maintenance_expense,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,other_power_generation_maintenance_expense,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,regional_market_maintenance_expense,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,steam_power_generation_maintenance_expense,electric,,,1.0 +income_statement_ferc1,maintenance_expense,electric_operating_expenses_ferc1,transmission_maintenance_expense_electric,electric,,,1.0 +income_statement_ferc1,operating_revenues,electric_operating_revenues_ferc1,electric_operating_revenues,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,administrative_and_general_operation_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,customer_account_expenses,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,customer_service_and_information_expenses,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,distribution_operation_expenses_electric,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,hydraulic_power_generation_operations_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,nuclear_power_generation_operations_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,other_power_generation_operations_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,regional_market_operation_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,sales_expenses,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,steam_power_generation_operations_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,transmission_operation_expense,electric,,,1.0 +income_statement_ferc1,operation_expense,electric_operating_expenses_ferc1,power_production_expenses,electric,,,1.0 +income_statement_ferc1,other_income_deductions,income_statement_ferc1,miscellaneous_deductions,,,,1.0 +income_statement_ferc1,taxes_on_other_income_and_deductions,income_statement_ferc1,investment_tax_credits,,,,-1.0 +plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,distribution_plant,electric,,,1.0 +plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,general_plant,electric,,,1.0 +plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,intangible_plant,electric,,,1.0 +plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,production_plant,electric,,,1.0 +plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,transmission_and_market_operation_plant_regional_transmission_and_market_operation_plant,electric,,,1.0 +plant_in_service_ferc1,electric_plant_in_service_and_completed_construction_not_classified_electric,plant_in_service_ferc1,transmission_plant,electric,,,1.0 +retained_earnings_ferc1,appropriated_retained_earnings_including_reserve_amortization,retained_earnings_ferc1,appropriated_retained_earnings,total,,,1.0 +retained_earnings_ferc1,appropriated_retained_earnings_including_reserve_amortization,retained_earnings_ferc1,appropriated_retained_earnings_amortization_reserve_federal,total,,,1.0 +retained_earnings_ferc1,retained_earnings,retained_earnings_ferc1,appropriated_retained_earnings_including_reserve_amortization,total,,,1.0 +retained_earnings_ferc1,retained_earnings,retained_earnings_ferc1,unappropriated_retained_earnings,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,adjustments_to_retained_earnings_credit,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,adjustments_to_retained_earnings_debit,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,appropriations_of_retained_earnings,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,balance_transferred_from_income,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,dividends_declared_common_stock,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,dividends_declared_preferred_stock,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,transfers_from_unappropriated_undistributed_subsidiary_earnings,total,,,1.0 +retained_earnings_ferc1,unappropriated_retained_earnings,retained_earnings_ferc1,unappropriated_retained_earnings_previous_year,total,,,1.0 +retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,changes_unappropriated_undistributed_subsidiary_earnings_credits,total,,,1.0 +retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,dividends_received,total,,,-1.0 +retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,equity_in_earnings_of_subsidiary_companies,total,,,1.0 +retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings_previous_year,total,,,1.0 utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_in_service,,,, -utility_plant_summary_ferc1,depreciation_utility_plant_in_service,electric_plant_depreciation_functional_ferc1,accumulated_depreciation,1.0,electric,total,in_service +utility_plant_summary_ferc1,depreciation_utility_plant_in_service,electric_plant_depreciation_functional_ferc1,accumulated_depreciation,electric,in_service,total,1.0 utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_classified,,,, -utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_classified_and_property_under_capital_leases,1.0,,, +utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_classified_and_property_under_capital_leases,,,,1.0 utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_property_under_capital_leases,,,, -utility_plant_summary_ferc1,utility_plant_in_service_experimental_plant_unclassified,plant_in_service_ferc1,experimental_electric_plant_unclassified,1.0,electric,, -utility_plant_summary_ferc1,utility_plant_in_service_plant_purchased_or_sold,plant_in_service_ferc1,electric_plant_purchased,1.0,electric,, -utility_plant_summary_ferc1,utility_plant_in_service_plant_purchased_or_sold,plant_in_service_ferc1,electric_plant_sold,-1.0,electric,, +utility_plant_summary_ferc1,utility_plant_in_service_experimental_plant_unclassified,plant_in_service_ferc1,experimental_electric_plant_unclassified,electric,in_service,,1.0 +utility_plant_summary_ferc1,utility_plant_in_service_plant_purchased_or_sold,plant_in_service_ferc1,electric_plant_purchased,electric,,,1.0 +utility_plant_summary_ferc1,utility_plant_in_service_plant_purchased_or_sold,plant_in_service_ferc1,electric_plant_sold,electric,,,-1.0 utility_plant_summary_ferc1,utility_plant_and_construction_work_in_progress,balance_sheet_assets_ferc1,utility_plant,,,, diff --git a/src/pudl/package_data/ferc1/xbrl_factoid_plant_function_tags.csv b/src/pudl/package_data/ferc1/xbrl_factoid_plant_function_tags.csv new file mode 100644 index 0000000000..fb0d4a1558 --- /dev/null +++ b/src/pudl/package_data/ferc1/xbrl_factoid_plant_function_tags.csv @@ -0,0 +1,29 @@ +table_name,xbrl_factoid,aggregatable_plant_function +plant_in_service_ferc1,intangible_plant,intangible +plant_in_service_ferc1,steam_production_plant,steam_production +plant_in_service_ferc1,nuclear_production_plant,nuclear_production +plant_in_service_ferc1,hydraulic_production_plant,hydraulic_production +plant_in_service_ferc1,other_production_plant,other_production +plant_in_service_ferc1,transmission_plant,transmission +plant_in_service_ferc1,distribution_plant,distribution +plant_in_service_ferc1,transmission_and_market_operation_plant_regional_transmission_and_market_operation_plant,regional_transmission_and_market_operation +plant_in_service_ferc1,general_plant,general +electric_operating_expenses_ferc1,steam_power_generation_operations_expense,steam_production +electric_operating_expenses_ferc1,steam_power_generation_maintenance_expense,steam_production +electric_operating_expenses_ferc1,nuclear_power_generation_operations_expense,nuclear_production +electric_operating_expenses_ferc1,nuclear_power_generation_maintenance_expense,nuclear_production +electric_operating_expenses_ferc1,hydraulic_power_generation_operations_expense,hydraulic_production +electric_operating_expenses_ferc1,hydraulic_power_generation_maintenance_expense,hydraulic_production +electric_operating_expenses_ferc1,other_power_generation_operations_expense,other_production +electric_operating_expenses_ferc1,other_power_generation_maintenance_expense,other_production +electric_operating_expenses_ferc1,other_power_supply_expense, +electric_operating_expenses_ferc1,transmission_operation_expense,transmission +electric_operating_expenses_ferc1,transmission_maintenance_expense_electric,transmission +electric_operating_expenses_ferc1,regional_market_operation_expense,regional_transmission_and_market_operation +electric_operating_expenses_ferc1,regional_market_maintenance_expense,regional_transmission_and_market_operation +electric_operating_expenses_ferc1,distribution_operation_expenses_electric,distribution +electric_operating_expenses_ferc1,distribution_maintenance_expense_electric,distribution +electric_operating_expenses_ferc1,customer_account_expenses, +electric_operating_expenses_ferc1,customer_service_and_information_expenses, +electric_operating_expenses_ferc1,sales_expenses, +electric_operating_expenses_ferc1,administrative_and_general_expenses,general diff --git a/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv b/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv new file mode 100644 index 0000000000..e2a92b956c --- /dev/null +++ b/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv @@ -0,0 +1,38 @@ +table_name,xbrl_factoid,aggregatable_plant_status +utility_plant_summary_ferc1,utility_plant_in_service_classified,in_service +utility_plant_summary_ferc1,utility_plant_in_service_property_under_capital_leases,in_service +utility_plant_summary_ferc1,utility_plant_in_service_plant_purchased_or_sold,in_service +utility_plant_summary_ferc1,utility_plant_in_service_completed_construction_not_classified,in_service +utility_plant_summary_ferc1,utility_plant_in_service_experimental_plant_unclassified,in_service +utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,in_service +utility_plant_summary_ferc1,utility_plant_leased_to_others,leased +utility_plant_summary_ferc1,utility_plant_held_for_future_use,future +utility_plant_summary_ferc1,construction_work_in_progress,construction_work_in_progress +utility_plant_summary_ferc1,utility_plant_acquisition_adjustment,total +utility_plant_summary_ferc1,utility_plant_and_construction_work_in_progress,total +utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,total +utility_plant_summary_ferc1,utility_plant_net,total +utility_plant_summary_ferc1,depreciation_utility_plant_in_service,in_service +utility_plant_summary_ferc1,amortization_and_depletion_of_producing_natural_gas_land_and_land_rights_utility_plant_in_service,in_service +utility_plant_summary_ferc1,amortization_of_underground_storage_land_and_land_rights_utility_plant_in_service,in_service +utility_plant_summary_ferc1,amortization_of_other_utility_plant_utility_plant_in_service,in_service +utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_in_service,in_service +utility_plant_summary_ferc1,depreciation_utility_plant_leased_to_others,leased +utility_plant_summary_ferc1,amortization_and_depletion_utility_plant_leased_to_others,leased +utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_leased_to_others,leased +utility_plant_summary_ferc1,depreciation_utility_plant_held_for_future_use,future +utility_plant_summary_ferc1,amortization_utility_plant_held_for_future_use,future +utility_plant_summary_ferc1,depreciation_and_amortization_utility_plant_held_for_future_use,future +utility_plant_summary_ferc1,abandonment_of_leases,total +utility_plant_summary_ferc1,amortization_of_plant_acquisition_adjustment,total +utility_plant_summary_ferc1,utility_plant_in_service_classified_and_property_under_capital_leases,in_service +utility_plant_summary_ferc1,utility_plant_in_service_plant_purchased_or_sold_correction,in_service +utility_plant_summary_ferc1,utility_plant_in_service_experimental_plant_unclassified_correction,in_service +utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified_correction,in_service +utility_plant_summary_ferc1,utility_plant_and_construction_work_in_progress_correction,construction_work_in_progress +utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_correction,total +utility_plant_summary_ferc1,utility_plant_net_correction,total +utility_plant_summary_ferc1,depreciation_utility_plant_in_service_correction,in_service +utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_leased_to_others_correction,leased +utility_plant_summary_ferc1,depreciation_and_amortization_utility_plant_held_for_future_use_correction,future +utility_plant_summary_ferc1,utility_plant_in_service_classified_and_property_under_capital_leases_correction,in_service diff --git a/src/pudl/package_data/ferc1/xbrl_factoid_rate_base_tags.csv b/src/pudl/package_data/ferc1/xbrl_factoid_rate_base_tags.csv index 705ec73e2f..0ec6f222a1 100644 --- a/src/pudl/package_data/ferc1/xbrl_factoid_rate_base_tags.csv +++ b/src/pudl/package_data/ferc1/xbrl_factoid_rate_base_tags.csv @@ -215,7 +215,6 @@ special_funds_(non_major_only),special_funds_(non_major_only),special_deposits,b (less)_reaquired_capital_stock,(less)_reaquired_capital_stock,reacquired_capital_stock,balance_sheet_liabilities_ferc1,no,,,, _noncorporate_proprietorship_(non_major_only),_noncorporate_proprietorship_(non_major_only),noncorporate_proprietorship,balance_sheet_liabilities_ferc1,no,,,, appropriated_retained_earnings_amort_reserve_federal,appropriated_retained_earnings_amort_reserve_federal,appropriated_retained_earnings_amortization_reserve_federal,retained_earnings_ferc1,no,,,, -depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,no,,future,, -depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,yes,,in_service,, -depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,yes,,leased,, -depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,partial,,total,, +depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,no,,,future, +depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,yes,,,in_service, +depreciation,depreciation,accumulated_depreciation,electric_plant_depreciation_functional_ferc1,yes,,,leased, diff --git a/src/pudl/package_data/glue/utility_id_ferc1.csv b/src/pudl/package_data/glue/utility_id_ferc1.csv index a0623757ce..463b84366d 100644 --- a/src/pudl/package_data/glue/utility_id_ferc1.csv +++ b/src/pudl/package_data/glue/utility_id_ferc1.csv @@ -420,4 +420,4 @@ utility_id_ferc1,utility_id_ferc1_xbrl,utility_id_ferc1_dbf 444,C011304, 445,C011745,447 446,C004679, -447,C011785, \ No newline at end of file +447,C011785, diff --git a/src/pudl/package_data/glue/utility_id_pudl.csv b/src/pudl/package_data/glue/utility_id_pudl.csv index d233fe5907..76156297a9 100644 --- a/src/pudl/package_data/glue/utility_id_pudl.csv +++ b/src/pudl/package_data/glue/utility_id_pudl.csv @@ -15941,4 +15941,4 @@ utility_id_pudl,utility_id_ferc1,utility_name_ferc1,utility_id_eia,utility_name_ 15983,,,65818,"Ables Springs Storage, LLC" 15984,,,65828,Santa Barbara County 15985,,,65834,DE Shaw Renewable Investments -15986,,,65850,"Hull Street Energy, LLC" \ No newline at end of file +15986,,,65850,"Hull Street Energy, LLC" diff --git a/src/pudl/settings.py b/src/pudl/settings.py index d432acd330..82c399a6df 100644 --- a/src/pudl/settings.py +++ b/src/pudl/settings.py @@ -2,19 +2,27 @@ import itertools import json from enum import Enum, unique -from typing import ClassVar +from typing import Any, ClassVar, Self import fsspec import pandas as pd import yaml -from dagster import Any, DagsterInvalidDefinitionError, Field -from pydantic import AnyHttpUrl, BaseSettings, root_validator, validator -from pydantic import BaseModel as PydanticBaseModel +from dagster import Field as DagsterField +from pydantic import ( + AnyHttpUrl, + BaseModel, + ConfigDict, + field_validator, + model_validator, +) +from pydantic_settings import BaseSettings import pudl import pudl.workspace.setup from pudl.metadata.classes import DataSource -from pudl.workspace.datastore import Datastore +from pudl.workspace.datastore import Datastore, ZenodoDoi + +logger = pudl.logging_helpers.get_logger(__name__) @unique @@ -28,17 +36,13 @@ class XbrlFormNumber(Enum): FORM714 = 714 -class BaseModel(PydanticBaseModel): +class FrozenBaseModel(BaseModel): """BaseModel with global configuration.""" - class Config: - """Pydantic config.""" - - allow_mutation = False - extra = "forbid" + model_config = ConfigDict(frozen=True, extra="forbid") -class GenericDatasetSettings(BaseModel): +class GenericDatasetSettings(FrozenBaseModel): """An abstract pydantic model for generic datasets. Each dataset must specify working partitions. A dataset can have an arbitrary number @@ -49,33 +53,40 @@ class GenericDatasetSettings(BaseModel): """ disabled: bool = False + data_source: ClassVar[DataSource] + + @model_validator(mode="after") + def validate_partitions(self: Self): + """Ensure that partitions and their values are valid. - @root_validator - def validate_partitions(cls, partitions): # noqa: N805 - """Validate the requested data partitions. + Checks that: + + * all partitions specified by the data source exist, + * partitions are not None + * only known to be working partition values are specified + * no duplicate partition values are specified - Check that all the partitions defined in the ``working_partitions`` of the - associated ``data_source`` (e.g. years or states) have been assigned in the - definition of the class, and that the requested values are a subset of the - allowable values defined by the ``data_source``. """ - for name, working_partitions in cls.data_source.working_partitions.items(): + for name, working_partitions in self.data_source.working_partitions.items(): try: - partition = partitions[name] + partition = getattr(self, name) except KeyError: - raise ValueError(f"{cls.__name__} is missing required '{name}' field.") + raise ValueError(f"{self.__name__} is missing required '{name}' field.") + + # Partition should never be None -- should get a default value set in + # the child classes based on the working partitions. + if partition is None: + raise ValueError(f"'In {self.__name__} partition {name} is None.") - # If partition is None, default to working_partitions - if not partitions[name]: - partition = working_partitions + if nonworking_partitions := list(set(partition) - set(working_partitions)): + raise ValueError(f"'{nonworking_partitions}' {name} are not available.") - partitions_not_working = list(set(partition) - set(working_partitions)) - if partitions_not_working: + if len(partition) != len(set(partition)): raise ValueError( - f"'{partitions_not_working}' {name} are not available." + f"'Duplicate values found in partition {name}: {partition}'" ) - partitions[name] = sorted(set(partition)) - return partitions + + return self @property def partitions(cls) -> list[None | dict[str, str]]: # noqa: N805 @@ -150,7 +161,8 @@ class EpaCemsSettings(GenericDatasetSettings): years: list[int] = data_source.working_partitions["years"] states: list[str] = data_source.working_partitions["states"] - @validator("states") + @field_validator("states") + @classmethod def allow_all_keyword(cls, states): # noqa: N805 """Allow users to specify ['all'] to get all states.""" if states == ["all"]: @@ -202,7 +214,8 @@ class Eia860Settings(GenericDatasetSettings): years: list[int] = data_source.working_partitions["years"] eia860m: bool = True - @validator("eia860m") + @field_validator("eia860m") + @classmethod def check_eia860m_date(cls, eia860m: bool) -> bool: # noqa: N805 """Check 860m date-year is exactly one year after most recent working 860 year. @@ -227,7 +240,7 @@ def check_eia860m_date(cls, eia860m: bool) -> bool: # noqa: N805 return eia860m -class GlueSettings(BaseModel): +class GlueSettings(FrozenBaseModel): """An immutable pydantic model to validate Glue settings. Args: @@ -239,7 +252,7 @@ class GlueSettings(BaseModel): ferc1: bool = True -class EiaSettings(BaseModel): +class EiaSettings(FrozenBaseModel): """An immutable pydantic model to validate EIA datasets settings. Args: @@ -248,29 +261,24 @@ class EiaSettings(BaseModel): eia923: Immutable pydantic model to validate eia923 settings. """ - eia860: Eia860Settings = None - eia861: Eia861Settings = None - eia923: Eia923Settings = None + eia860: Eia860Settings | None = None + eia861: Eia861Settings | None = None + eia923: Eia923Settings | None = None - @root_validator(pre=True) - def default_load_all(cls, values): # noqa: N805 - """If no datasets are specified default to all. - - Args: - values (Dict[str, BaseModel]): dataset settings. - - Returns: - values (Dict[str, BaseModel]): dataset settings. - """ - if not any(values.values()): - values["eia860"] = Eia860Settings() - values["eia861"] = Eia861Settings() - values["eia923"] = Eia923Settings() + @model_validator(mode="before") + @classmethod + def default_load_all(cls, data: dict[str, Any]) -> dict[str, Any]: + """If no datasets are specified default to all.""" + if not any(data.values()): + data["eia860"] = Eia860Settings() + data["eia861"] = Eia861Settings() + data["eia923"] = Eia923Settings() - return values + return data - @root_validator - def check_eia_dependencies(cls, values): # noqa: N805 + @model_validator(mode="before") + @classmethod + def check_eia_dependencies(cls, data: dict[str, Any]) -> dict[str, Any]: """Make sure the dependencies between the eia datasets are satisfied. Dependencies: @@ -282,20 +290,18 @@ def check_eia_dependencies(cls, values): # noqa: N805 Returns: values (Dict[str, BaseModel]): dataset settings. """ - eia923 = values.get("eia923") - eia860 = values.get("eia860") - if not eia923 and eia860: - values["eia923"] = Eia923Settings(years=eia860.years) - - if eia923 and not eia860: - available_years = Eia860Settings() - values["eia860"] = Eia860Settings( - years=[year for year in eia923.years if year in available_years] + if not data.get("eia923") and data.get("eia860"): + data["eia923"] = Eia923Settings(years=data["eia860"].years) + + if data.get("eia923") and not data.get("eia860"): + available_years = Eia860Settings().years + data["eia860"] = Eia860Settings( + years=[year for year in data["eia923"].years if year in available_years] ) - return values + return data -class DatasetsSettings(BaseModel): +class DatasetsSettings(FrozenBaseModel): """An immutable pydantic model to validate PUDL Dataset settings. Args: @@ -305,33 +311,35 @@ class DatasetsSettings(BaseModel): epacems: Immutable pydantic model to validate epacems settings. """ - eia: EiaSettings = None - epacems: EpaCemsSettings = None - ferc1: Ferc1Settings = None - ferc714: Ferc714Settings = None - glue: GlueSettings = None + eia: EiaSettings | None = None + epacems: EpaCemsSettings | None = None + ferc1: Ferc1Settings | None = None + ferc714: Ferc714Settings | None = None + glue: GlueSettings | None = None - @root_validator(pre=True) - def default_load_all(cls, values): # noqa: N805 + @model_validator(mode="before") + @classmethod + def default_load_all(cls, data: dict[str, Any]) -> dict[str, Any]: """If no datasets are specified default to all. Args: - values (Dict[str, BaseModel]): dataset settings. + data: dataset settings inputs. Returns: - values (Dict[str, BaseModel]): dataset settings. + Validated dataset settings inputs. """ - if not any(values.values()): - values["eia"] = EiaSettings() - values["epacems"] = EpaCemsSettings() - values["ferc1"] = Ferc1Settings() - values["ferc714"] = Ferc714Settings() - values["glue"] = GlueSettings() + if not any(data.values()): + data["eia"] = EiaSettings() + data["epacems"] = EpaCemsSettings() + data["ferc1"] = Ferc1Settings() + data["ferc714"] = Ferc714Settings() + data["glue"] = GlueSettings() - return values + return data - @root_validator - def add_glue_settings(cls, values): # noqa: N805 + @model_validator(mode="before") + @classmethod + def add_glue_settings(cls, data: dict[str, Any]) -> dict[str, Any]: """Add glue settings if ferc1 and eia data are both requested. Args: @@ -340,17 +348,17 @@ def add_glue_settings(cls, values): # noqa: N805 Returns: values (Dict[str, BaseModel]): dataset settings. """ - ferc1 = bool(values.get("ferc1")) - eia = bool(values.get("eia")) + ferc1 = bool(data.get("ferc1")) + eia = bool(data.get("eia")) + if ferc1 and eia: + data["glue"] = GlueSettings(ferc1=ferc1, eia=eia) + return data - values["glue"] = GlueSettings(ferc1=ferc1, eia=eia) - return values - - def get_datasets(self): # noqa: N805 + def get_datasets(self: Self): """Gets dictionary of dataset settings.""" return vars(self) - def make_datasources_table(self, ds: Datastore) -> pd.DataFrame: + def make_datasources_table(self: Self, ds: Datastore) -> pd.DataFrame: """Compile a table of dataset information. There are three places we can look for information about a dataset: @@ -397,7 +405,7 @@ def make_datasources_table(self, ds: Datastore) -> pd.DataFrame: for dataset in datasets ], "doi": [ - _make_doi_clickable(ds.get_datapackage_descriptor(dataset).doi) + str(_zenodo_doi_to_url(ds.get_datapackage_descriptor(dataset).doi)) for dataset in datasets ], } @@ -419,8 +427,10 @@ def make_datasources_table(self, ds: Datastore) -> pd.DataFrame: ) ], "doi": [ - _make_doi_clickable( - ds.get_datapackage_descriptor("eia860m").doi + str( + _zenodo_doi_to_url( + ds.get_datapackage_descriptor("eia860m").doi + ) ) ], } @@ -454,7 +464,7 @@ class FercGenericXbrlToSqliteSettings(BaseSettings): disabled: if True, skip processing this dataset. """ - taxonomy: AnyHttpUrl + taxonomy: str years: list[int] disabled: bool = False @@ -471,7 +481,7 @@ class Ferc1XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): years: list[int] = [ year for year in data_source.working_partitions["years"] if year >= 2021 ] - taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form1/2022-01-01/form/form1/form-1_2022-01-01.xsd" + taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form1/2022-01-01/form/form1/form-1_2022-01-01.xsd" class Ferc2XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): @@ -485,7 +495,7 @@ class Ferc2XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): years: list[int] = [ year for year in data_source.working_partitions["years"] if year >= 2021 ] - taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form2/2022-01-01/form/form2/form-2_2022-01-01.xsd" + taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form2/2022-01-01/form/form2/form-2_2022-01-01.xsd" class Ferc2DbfToSqliteSettings(GenericDatasetSettings): @@ -532,7 +542,7 @@ class Ferc6XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): years: list[int] = [ year for year in data_source.working_partitions["years"] if year >= 2021 ] - taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form6/2022-01-01/form/form6/form-6_2022-01-01.xsd" + taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form6/2022-01-01/form/form6/form-6_2022-01-01.xsd" class Ferc60DbfToSqliteSettings(GenericDatasetSettings): @@ -563,7 +573,7 @@ class Ferc60XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): years: list[int] = [ year for year in data_source.working_partitions["years"] if year >= 2021 ] - taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form60/2022-01-01/form/form60/form-60_2022-01-01.xsd" + taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form60/2022-01-01/form/form60/form-60_2022-01-01.xsd" class Ferc714XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): @@ -574,8 +584,8 @@ class Ferc714XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): """ data_source: ClassVar[DataSource] = DataSource.from_id("ferc714") - years: list[int] = [2021] - taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form714/2022-01-01/form/form714/form-714_2022-01-01.xsd" + years: list[int] = [2021, 2022] + taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form714/2022-01-01/form/form714/form-714_2022-01-01.xsd" class FercToSqliteSettings(BaseSettings): @@ -584,42 +594,36 @@ class FercToSqliteSettings(BaseSettings): Args: ferc1_dbf_to_sqlite_settings: Settings for converting FERC 1 DBF data to SQLite. ferc1_xbrl_to_sqlite_settings: Settings for converting FERC 1 XBRL data to - SQLite. + SQLite. other_xbrl_forms: List of non-FERC1 forms to convert from XBRL to SQLite. """ - ferc1_dbf_to_sqlite_settings: Ferc1DbfToSqliteSettings = None - ferc1_xbrl_to_sqlite_settings: Ferc1XbrlToSqliteSettings = None - ferc2_dbf_to_sqlite_settings: Ferc2DbfToSqliteSettings = None - ferc2_xbrl_to_sqlite_settings: Ferc2XbrlToSqliteSettings = None - ferc6_dbf_to_sqlite_settings: Ferc6DbfToSqliteSettings = None - ferc6_xbrl_to_sqlite_settings: Ferc6XbrlToSqliteSettings = None - ferc60_dbf_to_sqlite_settings: Ferc60DbfToSqliteSettings = None - ferc60_xbrl_to_sqlite_settings: Ferc60XbrlToSqliteSettings = None - ferc714_xbrl_to_sqlite_settings: Ferc714XbrlToSqliteSettings = None - - @root_validator(pre=True) - def default_load_all(cls, values): # noqa: N805 - """If no datasets are specified default to all. - - Args: - values (Dict[str, BaseModel]): dataset settings. - - Returns: - values (Dict[str, BaseModel]): dataset settings. - """ - if not any(values.values()): - values["ferc1_dbf_to_sqlite_settings"] = Ferc1DbfToSqliteSettings() - values["ferc1_xbrl_to_sqlite_settings"] = Ferc1XbrlToSqliteSettings() - values["ferc2_dbf_to_sqlite_settings"] = Ferc2DbfToSqliteSettings() - values["ferc2_xbrl_to_sqlite_settings"] = Ferc2XbrlToSqliteSettings() - values["ferc6_dbf_to_sqlite_settings"] = Ferc6DbfToSqliteSettings() - values["ferc6_xbrl_to_sqlite_settings"] = Ferc6XbrlToSqliteSettings() - values["ferc60_dbf_to_sqlite_settings"] = Ferc60DbfToSqliteSettings() - values["ferc60_xbrl_to_sqlite_settings"] = Ferc60XbrlToSqliteSettings() - values["ferc714_xbrl_to_sqlite_settings"] = Ferc714XbrlToSqliteSettings() - - return values + ferc1_dbf_to_sqlite_settings: Ferc1DbfToSqliteSettings | None = None + ferc1_xbrl_to_sqlite_settings: Ferc1XbrlToSqliteSettings | None = None + ferc2_dbf_to_sqlite_settings: Ferc2DbfToSqliteSettings | None = None + ferc2_xbrl_to_sqlite_settings: Ferc2XbrlToSqliteSettings | None = None + ferc6_dbf_to_sqlite_settings: Ferc6DbfToSqliteSettings | None = None + ferc6_xbrl_to_sqlite_settings: Ferc6XbrlToSqliteSettings | None = None + ferc60_dbf_to_sqlite_settings: Ferc60DbfToSqliteSettings | None = None + ferc60_xbrl_to_sqlite_settings: Ferc60XbrlToSqliteSettings | None = None + ferc714_xbrl_to_sqlite_settings: Ferc714XbrlToSqliteSettings | None = None + + @model_validator(mode="before") + @classmethod + def default_load_all(cls, data: dict[str, Any]) -> dict[str, Any]: + """If no datasets are specified default to all.""" + if not any(data.values()): + data["ferc1_dbf_to_sqlite_settings"] = Ferc1DbfToSqliteSettings() + data["ferc1_xbrl_to_sqlite_settings"] = Ferc1XbrlToSqliteSettings() + data["ferc2_dbf_to_sqlite_settings"] = Ferc2DbfToSqliteSettings() + data["ferc2_xbrl_to_sqlite_settings"] = Ferc2XbrlToSqliteSettings() + data["ferc6_dbf_to_sqlite_settings"] = Ferc6DbfToSqliteSettings() + data["ferc6_xbrl_to_sqlite_settings"] = Ferc6XbrlToSqliteSettings() + data["ferc60_dbf_to_sqlite_settings"] = Ferc60DbfToSqliteSettings() + data["ferc60_xbrl_to_sqlite_settings"] = Ferc60XbrlToSqliteSettings() + data["ferc714_xbrl_to_sqlite_settings"] = Ferc714XbrlToSqliteSettings() + + return data def get_xbrl_dataset_settings( self, form_number: XbrlFormNumber @@ -648,13 +652,13 @@ def get_xbrl_dataset_settings( class EtlSettings(BaseSettings): """Main settings validation class.""" - ferc_to_sqlite_settings: FercToSqliteSettings = None - datasets: DatasetsSettings = None + ferc_to_sqlite_settings: FercToSqliteSettings | None = None + datasets: DatasetsSettings | None = None - name: str = None - title: str = None - description: str = None - version: str = None + name: str | None = None + title: str | None = None + description: str | None = None + version: str | None = None # This is list of fsspec compatible paths to publish the output datasets to. publish_destinations: list[str] = [] @@ -671,44 +675,42 @@ def from_yaml(cls, path: str) -> "EtlSettings": """ with fsspec.open(path) as f: yaml_file = yaml.safe_load(f) - return cls.parse_obj(yaml_file) + return cls.model_validate(yaml_file) -def _convert_settings_to_dagster_config(d: dict) -> None: - """Convert dictionary of dataset settings to dagster config. +def _convert_settings_to_dagster_config(settings_dict: dict[str, Any]) -> None: + """Recursively convert a dictionary of dataset settings to dagster config in place. - For each partition parameter in a GenericDatasetSettings subclass, create a Noneable - Dagster field with a default value of None. The GenericDatasetSettings - subclasses will default to include all working paritions if the partition value - is None. Get the value type so dagster can do some basic type checking in the UI. + For each partition parameter in a :class:`GenericDatasetSettings` subclass, create a + corresponding :class:`DagsterField`. By default the :class:`GenericDatasetSettings` + subclasses will default to include all working paritions if the partition value is + None. Get the value type so dagster can do some basic type checking in the UI. Args: - d: dictionary of datasources and their parameters. + settings_dict: dictionary of datasources and their parameters. """ - for k, v in d.items(): - if isinstance(v, dict): - _convert_settings_to_dagster_config(v) + for key, value in settings_dict.items(): + if isinstance(value, dict): + _convert_settings_to_dagster_config(value) else: - try: - d[k] = Field(type(v), default_value=v) - except DagsterInvalidDefinitionError: - # Dagster config accepts a valid dagster types. - # Most of our settings object properties are valid types - # except for fields like taxonomy which are the AnyHttpUrl type. - d[k] = Field(Any, default_value=v) + settings_dict[key] = DagsterField(type(value), default_value=value) -def create_dagster_config(settings: BaseModel) -> dict: - """Create a dictionary of dagster config for the DatasetsSettings Class. +def create_dagster_config(settings: GenericDatasetSettings) -> dict[str, DagsterField]: + """Create a dictionary of dagster config out of a :class:`GenericDatasetsSettings`. + + Args: + settings: A dataset settings object, subclassed from + :class:`GenericDatasetSettings`. Returns: - A dictionary of dagster configuration. + A dictionary of :class:`DagsterField` objects. """ - ds = settings.dict() - _convert_settings_to_dagster_config(ds) - return ds + settings_dict = settings.model_dump() + _convert_settings_to_dagster_config(settings_dict) + return settings_dict -def _make_doi_clickable(link): - """Make a clickable DOI.""" - return f"https://doi.org/{link}" +def _zenodo_doi_to_url(doi: ZenodoDoi) -> AnyHttpUrl: + """Create a DOI URL out o a Zenodo DOI.""" + return AnyHttpUrl(f"https://doi.org/{doi}") diff --git a/src/pudl/transform/classes.py b/src/pudl/transform/classes.py index f32071696e..0f41e609b4 100644 --- a/src/pudl/transform/classes.py +++ b/src/pudl/transform/classes.py @@ -69,11 +69,18 @@ from collections.abc import Callable from functools import wraps from itertools import combinations -from typing import Any, Protocol +from typing import Annotated, Any, Protocol, Self import numpy as np import pandas as pd -from pydantic import BaseModel, conset, root_validator, validator +from pydantic import ( + BaseModel, + ConfigDict, + Field, + ValidationInfo, + field_validator, + model_validator, +) import pudl.logging_helpers import pudl.transform.params.ferc1 @@ -92,11 +99,7 @@ class TransformParams(BaseModel): when applied by their associated function. """ - class Config: - """Prevent parameters from changing part way through.""" - - allow_mutation = False - extra = "forbid" + model_config = ConfigDict(frozen=True, extra="forbid") class MultiColumnTransformParams(TransformParams): @@ -118,16 +121,16 @@ class MultiColumnTransformParams(TransformParams): https://pydantic-docs.helpmanual.io/blog/pydantic-v2/#validation-without-a-model """ - @root_validator - def single_param_type(cls, params): # noqa: N805 + @model_validator(mode="after") + def single_param_type(self: Self, info: ValidationInfo): """Check that all TransformParams in the dictionary are of the same type.""" - param_types = {type(params[col]) for col in params} + param_types = {type(info.data[col]) for col in info.data} if len(param_types) > 1: raise ValueError( "Found multiple parameter types in multi-column transform params: " f"{param_types}" ) - return params + return self ##################################################################################### @@ -424,7 +427,8 @@ class StringCategories(TransformParams): :func:`categorize_strings` to see how it is used. """ - @validator("categories") + @field_validator("categories") + @classmethod def categories_are_disjoint(cls, v): """Ensure that each string to be categorized only appears in one category.""" for cat1, cat2 in combinations(v, 2): @@ -436,7 +440,8 @@ def categories_are_disjoint(cls, v): ) return v - @validator("categories") + @field_validator("categories") + @classmethod def categories_are_idempotent(cls, v): """Ensure that every category contains the string it will map to. @@ -503,17 +508,17 @@ class UnitConversion(TransformParams): from_unit: str = "" # If it's the empty string, no renaming will happen. to_unit: str = "" # If it's the empty string, no renaming will happen. - @root_validator - def both_or_neither_units_are_none(cls, params): + @model_validator(mode="after") + def both_or_neither_units_are_none(self: Self): """Ensure that either both or neither of the units strings are None.""" - if (params["from_unit"] == "" and params["to_unit"] != "") or ( - params["from_unit"] != "" and params["to_unit"] == "" + if (self.from_unit == "" and self.to_unit != "") or ( + self.from_unit != "" and self.to_unit == "" ): raise ValueError( "Either both or neither of from_unit and to_unit must be non-empty. " - f"Got {params['from_unit']=} {params['to_unit']=}." + f"Got {self.from_unit=} {self.to_unit=}." ) - return params + return self def inverse(self) -> "UnitConversion": """Construct a :class:`UnitConversion` that is the inverse of self. @@ -572,12 +577,13 @@ class ValidRange(TransformParams): lower_bound: float = -np.inf upper_bound: float = np.inf - @validator("upper_bound") - def upper_bound_gte_lower_bound(cls, v, values): + @field_validator("upper_bound") + @classmethod + def upper_bound_gte_lower_bound(cls, upper_bound: float, info: ValidationInfo): """Require upper bound to be greater than or equal to lower bound.""" - if values["lower_bound"] > v: + if info.data["lower_bound"] > upper_bound: raise ValueError("upper_bound must be greater than or equal to lower_bound") - return v + return upper_bound def nullify_outliers(col: pd.Series, params: ValidRange) -> pd.Series: @@ -622,7 +628,8 @@ class UnitCorrections(TransformParams): unit_conversions: list[UnitConversion] """A list of unit conversions to use to identify errors and correct them.""" - @validator("unit_conversions") + @field_validator("unit_conversions") + @classmethod def no_column_rename(cls, params: list[UnitConversion]) -> list[UnitConversion]: """Ensure that the unit conversions used in corrections don't rename the column. @@ -636,8 +643,8 @@ def no_column_rename(cls, params: list[UnitConversion]) -> list[UnitConversion]: ) return new_conversions - @root_validator - def distinct_domains(cls, params): + @model_validator(mode="after") + def distinct_domains(self: Self): """Verify that all unit conversions map distinct domains to the valid range. If the domains being mapped to the valid range overlap, then it is ambiguous @@ -654,12 +661,12 @@ def distinct_domains(cls, params): corrected to be 2. """ input_vals = pd.Series( - [params["valid_range"].lower_bound, params["valid_range"].upper_bound], + [self.valid_range.lower_bound, self.valid_range.upper_bound], name="dude", ) # We need to make sure that the unit conversion doesn't map the valid range # onto itself either, so add an additional conversion that does nothing: - uc_combos = combinations(params["unit_conversions"] + [UnitConversion()], 2) + uc_combos = combinations(self.unit_conversions + [UnitConversion()], 2) for uc1, uc2 in uc_combos: out1 = convert_units(input_vals, uc1.inverse()) out2 = convert_units(input_vals, uc2.inverse()) @@ -667,11 +674,11 @@ def distinct_domains(cls, params): raise ValueError( "The following pair of unit corrections are incompatible due to " "overlapping domains.\n" - f"{params['valid_range']=}\n" + f"{self.valid_range=}\n" f"{uc1=}\n" f"{uc2=}\n" ) - return params + return self def correct_units(df: pd.DataFrame, params: UnitCorrections) -> pd.DataFrame: @@ -734,7 +741,7 @@ def correct_units(df: pd.DataFrame, params: UnitCorrections) -> pd.DataFrame: class InvalidRows(TransformParams): """Pameters that identify invalid rows to drop.""" - invalid_values: conset(Any, min_items=1) | None = None + invalid_values: Annotated[set[Any], Field(min_length=1)] | None = None """A list of values that should be considered invalid in the selected columns.""" required_valid_cols: list[str] | None = None @@ -753,16 +760,16 @@ class InvalidRows(TransformParams): regex: str | None = None """A regular expression to use as the ``regex`` argument to :meth:`pd.filter`.""" - @root_validator - def one_filter_argument(cls, values): + @model_validator(mode="after") + def one_filter_argument(self: Self): """Validate that only one argument is specified for :meth:`pd.filter`.""" num_args = sum( int(bool(val)) for val in [ - values["required_valid_cols"], - values["allowed_invalid_cols"], - values["like"], - values["regex"], + self.required_valid_cols, + self.allowed_invalid_cols, + self.like, + self.regex, ] ) if num_args > 1: @@ -771,7 +778,7 @@ def one_filter_argument(cls, values): f"{num_args} were found." ) - return values + return self def drop_invalid_rows(df: pd.DataFrame, params: InvalidRows) -> pd.DataFrame: @@ -901,8 +908,8 @@ def spot_fix_values(df: pd.DataFrame, params: SpotFixes) -> pd.DataFrame: ) # Convert input datatypes to match corresponding df columns. - for x in spot_fixes_df.columns: - spot_fixes_df[x] = spot_fixes_df[x].astype(df[x].dtypes.name) + for col in spot_fixes_df.columns: + spot_fixes_df[col] = spot_fixes_df[col].astype(df[col].dtypes.name) spot_fixes_df = spot_fixes_df.set_index(params.idx_cols) df = df.set_index(params.idx_cols) @@ -910,7 +917,8 @@ def spot_fix_values(df: pd.DataFrame, params: SpotFixes) -> pd.DataFrame: if params.expect_unique is True and not df.index.is_unique: cols_list = ", ".join(params.idx_cols) raise ValueError( - f"This spot fix expects a unique set of idx_col, but the idx_cols provided are not uniquely identifying: {cols_list}." + "This spot fix expects a unique set of idx_col, but the idx_cols provided " + f"are not uniquely identifying: {cols_list}." ) # Only keep spot fix values found in the dataframe index. diff --git a/src/pudl/transform/ferc1.py b/src/pudl/transform/ferc1.py index 1c497fd370..b8b9843974 100644 --- a/src/pudl/transform/ferc1.py +++ b/src/pudl/transform/ferc1.py @@ -15,14 +15,14 @@ from abc import abstractmethod from collections import namedtuple from collections.abc import Mapping -from typing import Any, Literal, Self +from typing import Annotated, Any, Literal, Self import numpy as np import pandas as pd import sqlalchemy as sa from dagster import AssetIn, AssetsDefinition, asset from pandas.core.groupby import DataFrameGroupBy -from pydantic import BaseModel, confloat, validator +from pydantic import BaseModel, Field, field_validator import pudl from pudl.analysis.classify_plants_ferc1 import ( @@ -205,13 +205,13 @@ def rename_dicts_xbrl(self): class WideToTidy(TransformParams): """Parameters for converting a wide table to a tidy table with value types.""" - idx_cols: list[str] | None + idx_cols: list[str] | None = None """List of column names to treat as the table index.""" stacked_column_name: str | None = None """Name of column that will contain the stacked categories.""" - value_types: list[str] | None + value_types: list[str] | None = None """List of names of value types that will end up being the column names. Some of the FERC tables have multiple data types spread across many different @@ -643,7 +643,8 @@ class CombineAxisColumnsXbrl(TransformParams): new_axis_column_name: str | None = None """The name of the combined axis column -- must end with the suffix ``_axis``!.""" - @validator("new_axis_column_name") + @field_validator("new_axis_column_name") + @classmethod def doesnt_end_with_axis(cls, v): """Ensure that new axis column ends in _axis.""" if v is not None and not v.endswith("_axis"): @@ -724,10 +725,10 @@ def combine_axis_columns_xbrl( class IsCloseTolerance(TransformParams): """Info for testing a particular check.""" - isclose_rtol: confloat(ge=0.0) = 1e-5 + isclose_rtol: Annotated[float, Field(ge=0.0)] = 1e-5 """Relative tolerance to use in :func:`np.isclose` for determining equality.""" - isclose_atol: confloat(ge=0.0, le=0.01) = 1e-8 + isclose_atol: Annotated[float, Field(ge=0.0, le=0.01)] = 1e-8 """Absolute tolerance to use in :func:`np.isclose` for determining equality.""" @@ -744,12 +745,12 @@ class CalculationIsCloseTolerance(TransformParams): class MetricTolerances(TransformParams): """Tolerances for all data checks to be preformed within a grouped df.""" - error_frequency: confloat(ge=0.0, le=1.0) = 0.01 - relative_error_magnitude: confloat(ge=0.0) = 0.02 - null_calculated_value_frequency: confloat(ge=0.0, le=1.0) = 0.7 + error_frequency: Annotated[float, Field(ge=0.0, le=1.0)] = 0.01 + relative_error_magnitude: Annotated[float, Field(ge=0.0)] = 0.02 + null_calculated_value_frequency: Annotated[float, Field(ge=0.0, le=1.0)] = 0.7 """Fraction of records with non-null reported values and null calculated values.""" - absolute_error_magnitude: confloat(ge=0.0) = np.inf - null_reported_value_frequency: confloat(ge=0.0, le=1.0) = 1.0 + absolute_error_magnitude: Annotated[float, Field(ge=0.0)] = np.inf + null_reported_value_frequency: Annotated[float, Field(ge=0.0, le=1.0)] = 1.0 # ooof this one is just bad @@ -820,22 +821,25 @@ class GroupMetricChecks(TransformParams): group_metric_tolerances: GroupMetricTolerances = GroupMetricTolerances() is_close_tolerance: CalculationIsCloseTolerance = CalculationIsCloseTolerance() - # @root_validator - # def grouped_tol_ge_ungrouped_tol(cls, values): - # """Grouped tolerance should always be greater than or equal to ungrouped.""" - # group_metric_tolerances = values["group_metric_tolerances"] - # groups_to_check = values["groups_to_check"] - # for group in groups_to_check: - # metric_tolerances = group_metric_tolerances.dict().get(group) - # for metric_name, tolerance in metric_tolerances.items(): - # ungrouped_tolerance = group_metric_tolerances.dict()["ungrouped"].get( - # metric_name - # ) - # if tolerance < ungrouped_tolerance: - # raise AssertionError( - # f"In {group=}, {tolerance=} for {metric_name} should be greater than {ungrouped_tolerance=}." - # ) - # return values + # TODO: The mechanics of this validation are a pain, given the bajillion combos + # of tolerances we have in the matrix of checks. It works, but actually specifying + # all of the relative values is not currently ergonomic, so it is disabled for the + # moment. + # @model_validator(mode="after") + def grouped_tol_ge_ungrouped_tol(self: Self): + """Grouped tolerance should always be greater than or equal to ungrouped.""" + for group in self.groups_to_check: + metric_tolerances = self.group_metric_tolerances.model_dump().get(group) + for metric_name, tolerance in metric_tolerances.items(): + ungrouped_tolerance = self.group_metric_tolerances.model_dump()[ + "ungrouped" + ].get(metric_name) + if tolerance < ungrouped_tolerance: + raise AssertionError( + f"In {group=}, {tolerance=} for {metric_name} should be " + f"greater than {ungrouped_tolerance=}." + ) + return self class ReconcileTableCalculations(TransformParams): @@ -1105,18 +1109,23 @@ def calculate_values_from_components( "utility_id_ferc1", "report_year", ] - calc_df = ( - pd.merge( - calculation_components, - data, - validate="one_to_many", - on=calc_idx, - ) - # apply the weight from the calc to convey the sign before summing. - .assign(calculated_value=lambda x: x[value_col] * x.weight) - .groupby(gby_parent, as_index=False, dropna=False)[["calculated_value"]] - .sum(min_count=1) - ) + try: + calc_df = ( + pd.merge( + calculation_components, + data, + validate="one_to_many", + on=calc_idx, + ) + # apply the weight from the calc to convey the sign before summing. + .assign(calculated_value=lambda x: x[value_col] * x.weight) + .groupby(gby_parent, as_index=False, dropna=False)[["calculated_value"]] + .sum(min_count=1) + ) + except pd.errors.MergeError: # Make debugging easier. + raise pd.errors.MergeError( + f"Merge failed, duplicated merge keys in left dataset: \n{calculation_components[calculation_components.duplicated(calc_idx)]}" + ) # remove the _parent suffix so we can merge these calculated values back onto # the data using the original pks calc_df.columns = calc_df.columns.str.removesuffix("_parent") @@ -1174,14 +1183,16 @@ def check_calculation_metrics_by_group( for ( metric_name, metric_tolerance, - ) in group_metric_checks.group_metric_tolerances.dict()[group_name].items(): + ) in group_metric_checks.group_metric_tolerances.model_dump()[ + group_name + ].items(): if metric_name in group_metric_checks.metrics_to_check: # this feels icky. the param name for the metrics are all snake_case while # the metric classes are all TitleCase. So we convert to TitleCase title_case_test = metric_name.title().replace("_", "") group_metric_checker = globals()[title_case_test]( by=group_name, - is_close_tolerance=group_metric_checks.is_close_tolerance.dict()[ + is_close_tolerance=group_metric_checks.is_close_tolerance.model_dump()[ metric_name ], metric_tolerance=metric_tolerance, @@ -1532,7 +1543,7 @@ def aligned_dbf_table_names(self) -> list[str]: # FERC 1 transform helper functions. Probably to be integrated into a class # below as methods or moved to a different module once it's clear where they belong. ################################################################################ -def get_ferc1_dbf_rows_to_map(ferc1_engine: sa.engine.Engine) -> pd.DataFrame: +def get_ferc1_dbf_rows_to_map(ferc1_engine: sa.Engine) -> pd.DataFrame: """Identify DBF rows that need to be mapped to XBRL columns. Select all records in the ``f1_row_lit_tbl`` where the row literal associated with a @@ -1553,7 +1564,7 @@ def get_ferc1_dbf_rows_to_map(ferc1_engine: sa.engine.Engine) -> pd.DataFrame: return row_lit.loc[row_lit.changed, idx_cols + data_cols] -def update_dbf_to_xbrl_map(ferc1_engine: sa.engine.Engine) -> pd.DataFrame: +def update_dbf_to_xbrl_map(ferc1_engine: sa.Engine) -> pd.DataFrame: """Regenerate the FERC 1 DBF+XBRL glue while retaining existing mappings. Reads all rows that need to be mapped out of the ``f1_row_lit_tbl`` and appends @@ -2268,6 +2279,7 @@ def process_xbrl_metadata_calculations( .drop_duplicates(keep="first") .pipe(self.add_calculation_corrections) ) + # this is really a xbrl_factoid-level flag, but we need it while using this # calc components. calc_comps["is_within_table_calc"] = ( @@ -2300,7 +2312,7 @@ def process_xbrl_metadata_calculations( "Duplicates found in the calculation components where none were ." f"expected {dupes}" ) - return calc_comps + return calc_comps.convert_dtypes() @cache_df(key="merge_xbrl_metadata") def merge_xbrl_metadata( @@ -3353,18 +3365,22 @@ def process_xbrl_metadata( naming conventions...). We use the same rename dictionary, but as an argument to :meth:`pd.Series.replace` instead of :meth:`pd.DataFrame.rename`. """ - tbl_meta = super().process_xbrl_metadata( - xbrl_metadata_converted, xbrl_calculations + tbl_meta = ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="electric", plant_status="in_service") ) # Set pseudo-account numbers for rows that split or combine FERC accounts, but # which are not calculated values. tbl_meta.loc[ - tbl_meta.xbrl_factoid == "electric_plant_purchased", "ferc_account" - ] = "102_purchased" + tbl_meta.xbrl_factoid == "electric_plant_purchased", + ["ferc_account", "plant_status"], + ] = ["102_purchased", pd.NA] tbl_meta.loc[ - tbl_meta.xbrl_factoid == "electric_plant_sold", "ferc_account" - ] = "102_sold" + tbl_meta.xbrl_factoid == "electric_plant_sold", + ["ferc_account", "plant_status"], + ] = ["102_sold", pd.NA] tbl_meta.loc[ tbl_meta.xbrl_factoid == "electric_plant_in_service_and_completed_construction_not_classified_electric", @@ -3493,7 +3509,7 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame: """The main table-specific transformations, affecting contents not structure. Annotates and alters data based on information from the XBRL taxonomy metadata. - Also assigns utility type for use in table explosions. + Also assigns utility type, plant status & function for use in table explosions. Make all electric_plant_sold balances positive. """ df = super().transform_main(df).pipe(self.apply_sign_conventions) @@ -3508,7 +3524,14 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame: logger.info( f"{self.table_id.value}: Converted {len(df[neg_values])} negative values to positive." ) - return df.assign(utility_type="electric") + # Assign plant status and utility type + df = df.assign(utility_type="electric", plant_status="in_service") + df.loc[ + df.ferc_account_label.isin( + ["electric_plant_sold", "electric_plant_purchased"] + ) + ].plant_status = pd.NA # With two exceptions + return df class PlantsSmallFerc1TableTransformer(Ferc1AbstractTableTransformer): @@ -4748,6 +4771,23 @@ class BalanceSheetLiabilitiesFerc1TableTransformer(Ferc1AbstractTableTransformer table_id: TableIdFerc1 = TableIdFerc1.BALANCE_SHEET_LIABILITIES has_unique_record_ids: bool = False + @cache_df("process_xbrl_metadata") + def process_xbrl_metadata( + self: Self, + xbrl_metadata_converted: pd.DataFrame, + xbrl_calculations: pd.DataFrame, + ) -> pd.DataFrame: + """Transform the metadata to reflect the transformed data. + + Beyond the standard :meth:`Ferc1AbstractTableTransformer.process_xbrl_metadata` + processing, assign utility type. + """ + return ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="total") + ) + @cache_df(key="main") def transform_main(self: Self, df: pd.DataFrame) -> pd.DataFrame: """Duplicate data that appears in multiple distinct calculations. @@ -4771,7 +4811,7 @@ def transform_main(self: Self, df: pd.DataFrame) -> pd.DataFrame: .assign(liability_type=lambda x: "less_" + x.liability_type) ) - return pd.concat([df, new_data]) + return pd.concat([df, new_data]).assign(utility_type="total") def convert_xbrl_metadata_json_to_df( self: Self, @@ -4825,6 +4865,23 @@ class BalanceSheetAssetsFerc1TableTransformer(Ferc1AbstractTableTransformer): table_id: TableIdFerc1 = TableIdFerc1.BALANCE_SHEET_ASSETS_FERC1 has_unique_record_ids: bool = False + @cache_df("process_xbrl_metadata") + def process_xbrl_metadata( + self: Self, + xbrl_metadata_converted: pd.DataFrame, + xbrl_calculations: pd.DataFrame, + ) -> pd.DataFrame: + """Transform the metadata to reflect the transformed data. + + Beyond the standard :meth:`Ferc1AbstractTableTransformer.process_xbrl_metadata` + processing, assign utility type. + """ + return ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="total") + ) + @cache_df(key="main") def transform_main(self: Self, df: pd.DataFrame) -> pd.DataFrame: """Duplicate data that appears in multiple distinct calculations. @@ -4897,7 +4954,9 @@ def convert_xbrl_metadata_json_to_df( ] ] new_facts = pd.DataFrame(facts_to_add).convert_dtypes() - return pd.concat([tbl_meta, new_facts, duplicated_facts]) + return pd.concat([tbl_meta, new_facts, duplicated_facts]).assign( + utility_type="total" + ) class IncomeStatementFerc1TableTransformer(Ferc1AbstractTableTransformer): @@ -5064,6 +5123,23 @@ def process_dbf(self, raw_dbf: pd.DataFrame) -> pd.DataFrame: ) return processed_dbf + @cache_df("process_xbrl_metadata") + def process_xbrl_metadata( + self: Self, + xbrl_metadata_converted: pd.DataFrame, + xbrl_calculations: pd.DataFrame, + ) -> pd.DataFrame: + """Transform the metadata to reflect the transformed data. + + Beyond the standard :meth:`Ferc1AbstractTableTransformer.process_xbrl_metadata` + processing, assign utility type. + """ + return ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="total") + ) + @cache_df("main") def transform_main(self, df): """Add `_previous_year` factoids after standard transform_main. @@ -5074,7 +5150,7 @@ def transform_main(self, df): enable access to DBF data to fill this in as well. """ df = super().transform_main(df).pipe(self.add_previous_year_factoid) - return df + return df.assign(utility_type="total") def transform_end(self, df: pd.DataFrame) -> pd.DataFrame: """Check ``_previous_year`` factoids for consistency after the transformation is done.""" @@ -5377,7 +5453,11 @@ def process_xbrl_metadata( Beyond the standard :meth:`Ferc1AbstractTableTransformer.process_xbrl_metadata` processing, add FERC account values for a few known values. """ - meta = super().process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + meta = ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="electric") + ) # logger.info(meta) meta.loc[ meta.xbrl_factoid == "depreciation_expense", @@ -5401,7 +5481,6 @@ def process_xbrl_metadata( def transform_main(self, df): """After standard transform_main, assign utility type as electric.""" df = super().transform_main(df).assign(utility_type="electric") - # df["plant_function"] = df["plant_function"].replace("total", "electric") return df @@ -5562,6 +5641,20 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign(depreciation_type="accumulated_depreciation").pipe( super().transform_main ) + # convert this **one** utility's depreciation $$ from negative -> + + # this was found through checking the inter-table calculations in the explosion + # process. The one factoid in this table is linked with + # depreciation_utility_plant_in_service in the utility_plant_summary_ferc1 table. + # the values in both tables are almost always postive. Not always & there are + # some logical reasons why depreciation can sometimes be negative. Nonetheless, + # for this one utility, all of its values in utility_plant_summary_ferc1 are + # postive while nearly all of the $s over here are negative. No other utility + # has as many -$ which tells me this is a data entry error. + # see https://github.com/catalyst-cooperative/pudl/issues/2703 for more details + negative_util_mask = df.utility_id_ferc1 == 211 + df.loc[negative_util_mask, "ending_balance"] = abs( + df.loc[negative_util_mask, "ending_balance"] + ) return df @@ -5610,7 +5703,24 @@ def convert_xbrl_metadata_json_to_df( for dbf_only_fact in ["load_dispatching_transmission_expense"] ] dbf_only_facts = pd.DataFrame(dbf_only_facts).convert_dtypes() - return pd.concat([tbl_meta, dbf_only_facts]) + return pd.concat([tbl_meta, dbf_only_facts]).assign(utility_type="electric") + + @cache_df("process_xbrl_metadata") + def process_xbrl_metadata( + self: Self, + xbrl_metadata_converted: pd.DataFrame, + xbrl_calculations: pd.DataFrame, + ) -> pd.DataFrame: + """Transform the metadata to reflect the transformed data. + + Beyond the standard :meth:`Ferc1AbstractTableTransformer.process_xbrl_metadata` + processing, add utility type. + """ + return ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="electric") + ) @cache_df(key="dbf") def process_dbf(self, raw_dbf: pd.DataFrame) -> pd.DataFrame: @@ -5677,6 +5787,23 @@ def deduplicate_xbrl_factoid_xbrl_metadata( ) return tbl_meta_cleaned + @cache_df("process_xbrl_metadata") + def process_xbrl_metadata( + self: Self, + xbrl_metadata_converted: pd.DataFrame, + xbrl_calculations: pd.DataFrame, + ) -> pd.DataFrame: + """Transform the metadata to reflect the transformed data. + + Beyond the standard :meth:`Ferc1AbstractTableTransformer.process_xbrl_metadata` + processing, add utility type. + """ + return ( + super() + .process_xbrl_metadata(xbrl_metadata_converted, xbrl_calculations) + .assign(utility_type="electric") + ) + @cache_df("main") def transform_main(self, df): """Add duplicate removal after standard transform_main & assign utility type.""" @@ -6053,7 +6180,7 @@ def other_dimensions(table_names: list[str]) -> list[str]: def table_to_xbrl_factoid_name() -> dict[str, str]: - """Build a dictionary of table name (keys) to ``xbrl_factiod`` column name.""" + """Build a dictionary of table name (keys) to ``xbrl_factoid`` column name.""" return { table_name: transformer().params.xbrl_factoid_name for (table_name, transformer) in FERC1_TFR_CLASSES.items() @@ -6159,7 +6286,7 @@ def metadata_xbrl_ferc1(**kwargs) -> pd.DataFrame: io_manager_key=None, # Change to sqlite_io_manager... ) def calculation_components_xbrl_ferc1(**kwargs) -> pd.DataFrame: - """Create calculation-compnent table from table-level metadata.""" + """Create calculation-component table from table-level metadata.""" clean_xbrl_metadata_json = kwargs["clean_xbrl_metadata_json"] table_dimensions_ferc1 = kwargs["table_dimensions_ferc1"] metadata_xbrl_ferc1 = kwargs["metadata_xbrl_ferc1"] @@ -6193,26 +6320,71 @@ def calculation_components_xbrl_ferc1(**kwargs) -> pd.DataFrame: ) ) - # Defensive testing on this table! + child_cols = ["table_name", "xbrl_factoid"] + calc_cols = child_cols + dimensions + calc_and_parent_cols = calc_cols + [f"{col}_parent" for col in calc_cols] + # Defensive testing on this table! assert calc_components[["table_name", "xbrl_factoid"]].notnull().all(axis=1).all() - calc_cols = ["table_name", "xbrl_factoid"] + dimensions - calc_and_parent_cols = calc_cols + [f"{col}_parent" for col in calc_cols] + # Let's check that all calculated components that show up in our data are + # getting calculated. + def check_calcs_vs_table( + calcs: pd.DataFrame, + checked_table: pd.DataFrame, + idx_calcs: list[str], + idx_table: list[str], + how: Literal["in", "not_in"], + ) -> pd.DataFrame: + if how == "in": + idx = calcs.set_index(idx_calcs).index.intersection( + checked_table.set_index(idx_table).index + ) + elif how == "not_in": + idx = calcs.set_index(idx_calcs).index.difference( + checked_table.set_index(idx_table).index + ) + calcs_vs_table = calcs.set_index(idx_calcs).loc[idx] + return calcs_vs_table.reset_index() - missing_from_calcs_idx = ( - calc_components[calc_components.table_name.isin(FERC1_TFR_CLASSES.keys())] - .set_index(calc_cols) - .index.difference(metadata_xbrl_ferc1.set_index(calc_cols).index) + # which calculations are missing from the metadata table? + missing_calcs = check_calcs_vs_table( + calcs=calc_components[ + calc_components.table_name.isin(FERC1_TFR_CLASSES.keys()) + ], + checked_table=metadata_xbrl_ferc1, + idx_calcs=calc_cols, + idx_table=calc_cols, + how="not_in", ) # ensure that none of the calculation components that are missing from the metadata # table are from any of the exploded tables. - missing_calcs = calc_components.set_index(calc_cols).loc[missing_from_calcs_idx] if not missing_calcs.empty: - raise AssertionError( - # logger.warning( - f"Found missing calculations from the exploded tables:\n{missing_calcs=}" + logger.warning( + "Calculations found in calculation components table are missing from the " + "metadata_xbrl_ferc1 table." + ) + # which of these missing calculations actually show up in the transformed tables? + # This handles dbf-only calculation components, whic are added to the + # metadata_xbrl_ferc1 table as part of each table's transformations but aren't + # observed (or therefore present in table_dimensions_ferc1) in the fast ETL or + # in all subsets of years. We only want to flag calculation components as + # missing when they're actually observed in the data. + actually_missing_kids = check_calcs_vs_table( + calcs=missing_calcs, + checked_table=table_dimensions_ferc1, + idx_calcs=child_cols, + idx_table=child_cols, + how="in", + ) + logger.warning( + f"{len(actually_missing_kids)} of {len(missing_calcs)} missing calculation components observed in transformed FERC1 data." ) + if not actually_missing_kids.empty: + raise AssertionError( + f"Found missing calculations from the exploded tables:\n{actually_missing_kids=}" + ) + check_for_calc_components_duplicates( calc_components, table_names_known_dupes=["electricity_sales_by_rate_schedule_ferc1"], diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 73b72a5571..e1043945a0 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -9,13 +9,14 @@ from collections import defaultdict from collections.abc import Iterator from pathlib import Path -from typing import Any, Self +from typing import Annotated, Any, Self from urllib.parse import ParseResult, urlparse import datapackage import requests from google.auth.exceptions import DefaultCredentialsError -from pydantic import BaseSettings, HttpUrl, constr +from pydantic import HttpUrl, StringConstraints +from pydantic_settings import BaseSettings, SettingsConfigDict from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry @@ -27,9 +28,12 @@ logger = pudl.logging_helpers.get_logger(__name__) PUDL_YML = Path.home() / ".pudl.yml" -ZenodoDoi = constr( - strict=True, min_length=16, regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)" -) +ZenodoDoi = Annotated[ + str, + StringConstraints( + strict=True, min_length=16, pattern=r"(10\.5072|10\.5281)/zenodo.([\d]+)" + ), +] class ChecksumMismatchError(ValueError): @@ -166,41 +170,21 @@ def get_json_string(self) -> str: class ZenodoDoiSettings(BaseSettings): """Digital Object Identifiers pointing to currently used Zenodo archives.""" - # Sandbox DOIs are provided for reference censusdp1tract: ZenodoDoi = "10.5281/zenodo.4127049" - # censusdp1tract: ZenodoDoi = "10.5072/zenodo.674992" eia860: ZenodoDoi = "10.5281/zenodo.10067566" - # eia860: ZenodoDoi = "10.5072/zenodo.1222854" - eia860m: ZenodoDoi = "10.5281/zenodo.8188017" - # eia860m: ZenodoDoi = "10.5072/zenodo.1225517" - eia861: ZenodoDoi = "10.5281/zenodo.10093091" - # eia861: ZenodoDoi = "10.5072/zenodo.1229930" - eia923: ZenodoDoi = "10.5281/zenodo.8172818" - # eia923: ZenodoDoi = "10.5072/zenodo.1217724" + eia860m: ZenodoDoi = "10.5281/zenodo.10204686" + eia861: ZenodoDoi = "10.5281/zenodo.10204708" + eia923: ZenodoDoi = "10.5281/zenodo.10067550" eia_bulk_elec: ZenodoDoi = "10.5281/zenodo.7067367" - # eia_bulk_elec: ZenodoDoi = "10.5072/zenodo.1103572" epacamd_eia: ZenodoDoi = "10.5281/zenodo.7900974" - # epacamd_eia: ZenodoDoi = "10.5072/zenodo.1199170" epacems: ZenodoDoi = "10.5281/zenodo.8235497" - # epacems: ZenodoDoi = "10.5072/zenodo.1228519" ferc1: ZenodoDoi = "10.5281/zenodo.8326634" - # ferc1: ZenodoDoi = "10.5072/zenodo.1234455" ferc2: ZenodoDoi = "10.5281/zenodo.8326697" - # ferc2: ZenodoDoi = "10.5072/zenodo.1236695" ferc6: ZenodoDoi = "10.5281/zenodo.8326696" - # ferc6: ZenodoDoi = "10.5072/zenodo.1236703" ferc60: ZenodoDoi = "10.5281/zenodo.8326695" - # ferc60: ZenodoDoi = "10.5072/zenodo.1236694" ferc714: ZenodoDoi = "10.5281/zenodo.8326694" - # ferc714: ZenodoDoi = "10.5072/zenodo.1237565" phmsagas: ZenodoDoi = "10.5281/zenodo.8346646" - # phmsagas: ZenodoDoi = "10.5072/zenodo.1239253" - - class Config: - """Pydantic config, reads from .env file.""" - - env_prefix = "pudl_zenodo_doi_" - env_file = ".env" + model_config = SettingsConfigDict(env_prefix="pudl_zenodo_doi_", env_file=".env") class ZenodoFetcher: @@ -307,12 +291,12 @@ def __init__( Args: local_cache_path: if provided, LocalFileCache pointed at the data - subdirectory of this path will be used with this Datastore. + subdirectory of this path will be used with this Datastore. gcs_cache_path: if provided, GoogleCloudStorageCache will be used - to retrieve data files. The path is expected to have the following - format: gs://bucket[/path_prefix] + to retrieve data files. The path is expected to have the following + format: gs://bucket[/path_prefix] timeout: connection timeouts (in seconds) to use when connecting - to Zenodo servers. + to Zenodo servers. """ self._cache = resource_cache.LayeredCache() self._datapackage_descriptors: dict[str, DatapackageDescriptor] = {} diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 6afaa751ea..4337eb69e0 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -4,36 +4,15 @@ import pathlib import shutil from pathlib import Path -from typing import Any -from pydantic import BaseSettings, DirectoryPath -from pydantic.validators import path_validator +from pydantic import DirectoryPath, NewPath +from pydantic_settings import BaseSettings, SettingsConfigDict import pudl.logging_helpers logger = pudl.logging_helpers.get_logger(__name__) - -class MissingPath(Path): - """Validates potential path that doesn't exist.""" - - @classmethod - def __get_validators__(cls) -> Any: - """Validates that path doesn't exist and is path-like.""" - yield path_validator - yield cls.validate - - @classmethod - def validate(cls, value: Path) -> Path: - """Validates that path doesn't exist.""" - if value.exists(): - raise ValueError("path exists") - - return value - - -# TODO: The following could be replaced with NewPath from pydantic v2 -PotentialDirectoryPath = DirectoryPath | MissingPath +PotentialDirectoryPath = DirectoryPath | NewPath class PudlPaths(BaseSettings): @@ -45,11 +24,7 @@ class PudlPaths(BaseSettings): pudl_input: PotentialDirectoryPath pudl_output: PotentialDirectoryPath - - class Config: - """Pydantic config, reads from .env file.""" - - env_file = ".env" + model_config = SettingsConfigDict(env_file=".env", extra="ignore") @property def input_dir(self) -> Path: diff --git a/src/pudl/workspace/setup_cli.py b/src/pudl/workspace/setup_cli.py index 6dea0a7dbd..66001bde29 100644 --- a/src/pudl/workspace/setup_cli.py +++ b/src/pudl/workspace/setup_cli.py @@ -31,10 +31,11 @@ └── tmp PUDL_OUTPUT - ├── ferc1.sqlite + ├── ferc1_dbf.sqlite + ├── ferc1_xbrl.sqlite ... ├── pudl.sqlite - └── hourly_emissions_cems + └── hourly_emissions_cems.parquet Initially, the directories in the data store will be empty. The pudl_datastore or pudl_etl commands will download data from public sources and organize it for diff --git a/test/conftest.py b/test/conftest.py index 14aef3e05b..a09dc516f0 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -5,10 +5,10 @@ import logging import os from pathlib import Path +from typing import Any import pytest import sqlalchemy as sa -import yaml from dagster import build_init_resource_context, materialize_to_memory import pudl @@ -17,13 +17,15 @@ from pudl.extract.ferc1 import raw_xbrl_metadata_json from pudl.ferc_to_sqlite.cli import ferc_to_sqlite_job_factory from pudl.io_managers import ( + PudlSQLiteIOManager, ferc1_dbf_sqlite_io_manager, ferc1_xbrl_sqlite_io_manager, pudl_sqlite_io_manager, ) from pudl.metadata.classes import Package from pudl.output.pudltabl import PudlTabl -from pudl.settings import DatasetsSettings, EtlSettings +from pudl.settings import DatasetsSettings, EtlSettings, FercToSqliteSettings +from pudl.workspace.datastore import Datastore from pudl.workspace.setup import PudlPaths logger = logging.getLogger(__name__) @@ -86,19 +88,19 @@ def test_directory(): @pytest.fixture(scope="session", name="live_dbs") -def live_databases(request): +def live_databases(request) -> bool: """Fixture that tells whether to use existing live FERC1/PUDL DBs).""" return request.config.getoption("--live-dbs") @pytest.fixture(scope="session", name="save_unmapped_ids") -def save_unmapped_ids(request): +def save_unmapped_ids(request) -> bool: """Fixture that tells whether to use existing live FERC1/PUDL DBs).""" return request.config.getoption("--save-unmapped-ids") @pytest.fixture(scope="session", name="check_foreign_keys") -def check_foreign_keys(request): +def check_foreign_keys(request) -> bool: """Fixture that tells whether to use existing live FERC1/PUDL DBs).""" return not request.config.getoption("--ignore-foreign-key-constraints") @@ -112,14 +114,12 @@ def etl_parameters(request, test_dir) -> EtlSettings: etl_settings_yml = Path( test_dir.parent / "src/pudl/package_data/settings/etl_fast.yml" ) - with Path.open(etl_settings_yml, encoding="utf8") as settings_file: - etl_settings_out = yaml.safe_load(settings_file) - etl_settings = EtlSettings().parse_obj(etl_settings_out) + etl_settings = EtlSettings.from_yaml(etl_settings_yml) return etl_settings @pytest.fixture(scope="session", name="ferc_to_sqlite_settings") -def ferc_to_sqlite_parameters(etl_settings): +def ferc_to_sqlite_parameters(etl_settings: EtlSettings) -> FercToSqliteSettings: """Read ferc_to_sqlite parameters out of test settings dictionary.""" return etl_settings.ferc_to_sqlite_settings @@ -131,7 +131,7 @@ def pudl_etl_parameters(etl_settings: EtlSettings) -> DatasetsSettings: @pytest.fixture(scope="session", params=["AS"], ids=["ferc1_annual"]) -def pudl_out_ferc1(live_dbs, pudl_engine, request): +def pudl_out_ferc1(live_dbs: bool, pudl_engine: sa.Engine, request) -> PudlTabl: """Define parameterized PudlTabl output object fixture for FERC 1 tests.""" if not live_dbs: pytest.skip("Validation tests only work with a live PUDL DB.") @@ -143,7 +143,7 @@ def pudl_out_ferc1(live_dbs, pudl_engine, request): params=[None, "AS", "MS"], ids=["eia_raw", "eia_annual", "eia_monthly"], ) -def pudl_out_eia(live_dbs, pudl_engine, request): +def pudl_out_eia(live_dbs: bool, pudl_engine: sa.Engine, request) -> PudlTabl: """Define parameterized PudlTabl output object fixture for EIA tests.""" if not live_dbs: pytest.skip("Validation tests only work with a live PUDL DB.") @@ -157,9 +157,12 @@ def pudl_out_eia(live_dbs, pudl_engine, request): @pytest.fixture(scope="session", name="fast_out_annual") -def fast_out_annual(pudl_engine, pudl_datastore_fixture): +def fast_out_annual( + pudl_engine: sa.Engine, + pudl_datastore_fixture: Datastore, +) -> PudlTabl: """A PUDL output object for use in CI.""" - return pudl.output.pudltabl.PudlTabl( + return PudlTabl( pudl_engine, freq="AS", fill_fuel_cost=True, @@ -169,7 +172,7 @@ def fast_out_annual(pudl_engine, pudl_datastore_fixture): @pytest.fixture(scope="session") -def pudl_out_orig(live_dbs, pudl_engine): +def pudl_out_orig(live_dbs: bool, pudl_engine: sa.Engine) -> PudlTabl: """Create an unaggregated PUDL output object for checking raw data.""" if not live_dbs: pytest.skip("Validation tests only work with a live PUDL DB.") @@ -177,7 +180,9 @@ def pudl_out_orig(live_dbs, pudl_engine): @pytest.fixture(scope="session") -def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings): +def ferc_to_sqlite_dbf_only( + live_dbs: bool, pudl_datastore_config, etl_settings: EtlSettings +): """Create raw FERC 1 SQLite DBs, but only based on DBF sources.""" if not live_dbs: ferc_to_sqlite_job_factory( @@ -186,7 +191,7 @@ def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings): run_config={ "resources": { "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.dict() + "config": etl_settings.ferc_to_sqlite_settings.model_dump() }, "datastore": { "config": pudl_datastore_config, @@ -197,7 +202,9 @@ def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings): @pytest.fixture(scope="session") -def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings): +def ferc_to_sqlite_xbrl_only( + live_dbs: bool, pudl_datastore_config, etl_settings: EtlSettings +): """Create raw FERC 1 SQLite DBs, but only based on XBRL sources.""" if not live_dbs: ferc_to_sqlite_job_factory( @@ -206,7 +213,7 @@ def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings): run_config={ "resources": { "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.dict() + "config": etl_settings.ferc_to_sqlite_settings.model_dump() }, "datastore": { "config": pudl_datastore_config, @@ -217,7 +224,7 @@ def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings): @pytest.fixture(scope="session") -def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings): +def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings: EtlSettings): """Create raw FERC 1 SQLite DBs. If we are using the test database, we initialize it from scratch first. If we're @@ -226,14 +233,14 @@ def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings): """ if not live_dbs: logger.info( - f"ferc_to_sqlite_settings: {etl_settings.ferc_to_sqlite_settings.dict()}" + f"ferc_to_sqlite_settings: {etl_settings.ferc_to_sqlite_settings.model_dump()}" ) logger.info(f"ferc_to_sqlite PUDL_OUTPUT: {os.getenv('PUDL_OUTPUT')}") ferc_to_sqlite_job_factory()().execute_in_process( run_config={ "resources": { "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.dict() + "config": etl_settings.ferc_to_sqlite_settings.model_dump() }, "datastore": { "config": pudl_datastore_config, @@ -244,7 +251,7 @@ def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings): @pytest.fixture(scope="session", name="ferc1_engine_dbf") -def ferc1_dbf_sql_engine(ferc_to_sqlite_dbf_only): +def ferc1_dbf_sql_engine(ferc_to_sqlite_dbf_only: FercToSqliteSettings) -> sa.Engine: """Grab a connection to the FERC Form 1 DB clone.""" context = build_init_resource_context( resources={"dataset_settings": dataset_settings_config} @@ -253,7 +260,9 @@ def ferc1_dbf_sql_engine(ferc_to_sqlite_dbf_only): @pytest.fixture(scope="session", name="ferc1_engine_xbrl") -def ferc1_xbrl_sql_engine(ferc_to_sqlite_xbrl_only, dataset_settings_config): +def ferc1_xbrl_sql_engine( + ferc_to_sqlite_xbrl_only: FercToSqliteSettings, dataset_settings_config +) -> sa.Engine: """Grab a connection to the FERC Form 1 DB clone.""" context = build_init_resource_context( resources={"dataset_settings": dataset_settings_config} @@ -262,7 +271,7 @@ def ferc1_xbrl_sql_engine(ferc_to_sqlite_xbrl_only, dataset_settings_config): @pytest.fixture(scope="session", name="ferc1_xbrl_taxonomy_metadata") -def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl): +def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl: sa.Engine): """Read the FERC 1 XBRL taxonomy metadata from JSON.""" result = materialize_to_memory([raw_xbrl_metadata_json]) assert result.success @@ -272,14 +281,14 @@ def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl): @pytest.fixture(scope="session") def pudl_sql_io_manager( - ferc1_engine_dbf, # Implicit dependency - ferc1_engine_xbrl, # Implicit dependency - live_dbs, + ferc1_engine_dbf: sa.Engine, # Implicit dependency + ferc1_engine_xbrl: sa.Engine, # Implicit dependency + live_dbs: bool, pudl_datastore_config, dataset_settings_config, - check_foreign_keys, + check_foreign_keys: bool, request, -): +) -> PudlSQLiteIOManager: """Grab a connection to the PUDL IO manager. If we are using the test database, we initialize the PUDL DB from scratch. If we're @@ -312,49 +321,60 @@ def pudl_sql_io_manager( @pytest.fixture(scope="session") -def pudl_engine(pudl_sql_io_manager): +def pudl_engine(pudl_sql_io_manager: PudlSQLiteIOManager) -> sa.Engine: """Get PUDL SQL engine from io manager.""" return pudl_sql_io_manager.engine @pytest.fixture(scope="session", autouse=True) def configure_paths_for_tests(tmp_path_factory, request): - """Configures PudlPaths for tests.""" - gha_override_input = False - gha_override_output = False - if os.environ.get("GITHUB_ACTIONS", False): - gha_override_input = "PUDL_INPUTS" not in os.environ - gha_override_output = "PUDL_OUTPUTS" not in os.environ - logger.info( - "Running in GitHub Actions environment, using" - f" temporary input dir: {gha_override_input}, and" - f" temporary output dir: {gha_override_output}" - ) + """Configures PudlPaths for tests. + + Typically PUDL_INPUT and PUDL_OUTPUT will be read from the environment. + If we are running in GitHub Actions and they are NOT set, we'll use temp dirs. + If we are NOT running in GitHub Actions (e.g. we're running locally) then we always + want to use a temporary output directory, so we don't overwrite a user's existing + databases. + """ + # Just in case we need this later... pudl_tmpdir = tmp_path_factory.mktemp("pudl") - if gha_override_output or request.config.getoption("--tmp-data"): - in_tmp = pudl_tmpdir / "data" + # Are we running in GitHub Actions? + gha = os.environ.get("GITHUB_ACTIONS", False) + # Under what circumstances do we want to use a temporary input directory? + # This will force a re-download of raw inputs from Zenodo or the GCS cache: + if (gha and "PUDL_INPUT" not in os.environ) or ( + request.config.getoption("--tmp-data") + ): + in_tmp = pudl_tmpdir / "input" in_tmp.mkdir() PudlPaths.set_path_overrides( input_dir=str(Path(in_tmp).resolve()), ) - if gha_override_output or not request.config.getoption("--live-dbs"): + logger.info(f"Using temporary PUDL_INPUT: {in_tmp}") + + # Use a temporary output dir if we're on GHA and PUDL_OUTPUT is unset: + if (gha and "PUDL_OUTPUT" not in os.environ) or ( + # Use a temporary output dir if we're not on GHA and we're not using live DBs. + # This will typically be the case when running local unit/integration tests: + not gha and not request.config.getoption("--live-dbs") + ): out_tmp = pudl_tmpdir / "output" out_tmp.mkdir() PudlPaths.set_path_overrides( output_dir=str(Path(out_tmp).resolve()), ) - logger.info(f"Starting unit tests with output path {PudlPaths().output_dir}") + logger.info(f"Using temporary PUDL_OUTPUT: {out_tmp}") pudl.workspace.setup.init() @pytest.fixture(scope="session") -def dataset_settings_config(request, etl_settings): +def dataset_settings_config(request, etl_settings: EtlSettings): """Create dagster dataset_settings resource.""" - return etl_settings.datasets.dict() + return etl_settings.datasets.model_dump() @pytest.fixture(scope="session") # noqa: C901 -def pudl_datastore_config(request): +def pudl_datastore_config(request) -> dict[str, Any]: """Produce a :class:pudl.workspace.datastore.Datastore.""" gcs_cache_path = request.config.getoption("--gcs-cache-path") return { @@ -364,13 +384,13 @@ def pudl_datastore_config(request): @pytest.fixture(scope="session") -def pudl_datastore_fixture(pudl_datastore_config): +def pudl_datastore_fixture(pudl_datastore_config: dict[str, Any]) -> Datastore: """Create pudl Datastore resource.""" init_context = build_init_resource_context(config=pudl_datastore_config) return resources.datastore(init_context) -def skip_table_if_null_freq_table(table_name, freq): +def skip_table_if_null_freq_table(table_name: str, freq: str | None): """Check.""" if table_name in AS_MS_ONLY_FREQ_TABLES and freq is None: pytest.skip( diff --git a/test/integration/console_scripts_test.py b/test/integration/console_scripts_test.py index 58e2c04501..41eedf931d 100644 --- a/test/integration/console_scripts_test.py +++ b/test/integration/console_scripts_test.py @@ -16,5 +16,5 @@ @pytest.mark.script_launch_mode("inprocess") def test_pudl_scripts(script_runner, script_name): """Run each console script in --help mode for testing.""" - ret = script_runner.run(script_name, "--help", print_result=False) + ret = script_runner.run([script_name, "--help"], print_result=False) assert ret.success diff --git a/test/integration/datasette_metadata_test.py b/test/integration/datasette_metadata_test.py index e039a156b3..ce0c3bf004 100644 --- a/test/integration/datasette_metadata_test.py +++ b/test/integration/datasette_metadata_test.py @@ -13,7 +13,11 @@ def test_datasette_metadata_to_yml(ferc1_engine_xbrl): - """Test the ability to export metadata as YML for use with Datasette.""" + """Test the ability to export metadata as YML for use with Datasette. + + Requires the ferc1_engine_xbrl because we construct Datasette metadata from the + datapackage.json files which annotate the XBRL derived FERC SQLite DBs. + """ metadata_yml = PudlPaths().output_dir / "metadata.yml" logger.info(f"Writing Datasette Metadata to {metadata_yml}") @@ -24,15 +28,20 @@ def test_datasette_metadata_to_yml(ferc1_engine_xbrl): logger.info("Parsing generated metadata using datasette utils.") metadata_json = json.dumps(yaml.safe_load(metadata_yml.open())) parsed_metadata = datasette.utils.parse_metadata(metadata_json) - assert set(parsed_metadata["databases"]) == { - "pudl", - "ferc1", - "ferc1_xbrl", - "ferc2_xbrl", - "ferc6_xbrl", - "ferc60_xbrl", - "ferc714_xbrl", - } + assert sorted(set(parsed_metadata["databases"])) == sorted( + { + "ferc1_dbf", + "ferc1_xbrl", + "ferc2_dbf", + "ferc2_xbrl", + "ferc60_dbf", + "ferc60_xbrl", + "ferc6_dbf", + "ferc6_xbrl", + "ferc714_xbrl", + "pudl", + } + ) assert parsed_metadata["license"] == "CC-BY-4.0" assert ( parsed_metadata["databases"]["pudl"]["source_url"] @@ -45,7 +54,5 @@ def test_datasette_metadata_to_yml(ferc1_engine_xbrl): == "plant_name_eia" ) for tbl_name in parsed_metadata["databases"]["pudl"]["tables"]: - assert ( - parsed_metadata["databases"]["pudl"]["tables"][tbl_name]["columns"] - is not None - ) + if parsed_metadata["databases"]["pudl"]["tables"][tbl_name]["columns"] is None: + raise AssertionError(f"pudl.{tbl_name}.columns is None") diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index 21ea53dd13..6f5c6dda72 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -11,43 +11,48 @@ from dagster import build_init_resource_context import pudl +from pudl.io_managers import PudlSQLiteIOManager logger = logging.getLogger(__name__) -def test_pudl_engine(pudl_engine, pudl_sql_io_manager, check_foreign_keys): +def test_pudl_engine( + pudl_engine: sa.Engine, + pudl_sql_io_manager: PudlSQLiteIOManager, + check_foreign_keys: bool, +): """Get pudl_engine and do basic inspection. By default the foreign key checks are not enabled in pudl.sqlite. This test will check if there are any foregin key errors if check_foreign_keys is True. """ - assert isinstance(pudl_engine, sa.engine.Engine) # nosec: B101 + assert isinstance(pudl_engine, sa.Engine) insp = sa.inspect(pudl_engine) - assert "plants_pudl" in insp.get_table_names() # nosec: B101 - assert "utilities_pudl" in insp.get_table_names() # nosec: B101 + assert "plants_pudl" in insp.get_table_names() + assert "utilities_pudl" in insp.get_table_names() if check_foreign_keys: # Raises ForeignKeyErrors if there are any pudl_sql_io_manager.check_foreign_keys() -def test_ferc1_xbrl2sqlite(ferc1_engine_xbrl, ferc1_xbrl_taxonomy_metadata): +def test_ferc1_xbrl2sqlite(ferc1_engine_xbrl: sa.Engine, ferc1_xbrl_taxonomy_metadata): """Attempt to access the XBRL based FERC 1 SQLite DB & XBRL taxonomy metadata. We're testing both the SQLite & JSON taxonomy here because they are generated together by the FERC 1 XBRL ETL. """ # Does the database exist, and contain a table we expect it to contain? - assert isinstance(ferc1_engine_xbrl, sa.engine.Engine) # nosec: B101 - assert ( # nosec: B101 + assert isinstance(ferc1_engine_xbrl, sa.Engine) + assert ( "identification_001_duration" in sa.inspect(ferc1_engine_xbrl).get_table_names() ) # Has the metadata we've read in from JSON contain a long list of entities? - assert isinstance(ferc1_xbrl_taxonomy_metadata, dict) # nosec: B101 - assert "plants_steam_ferc1" in ferc1_xbrl_taxonomy_metadata # nosec: B101 - assert len(ferc1_xbrl_taxonomy_metadata) > 10 # nosec: B101 - assert len(ferc1_xbrl_taxonomy_metadata) < 100 # nosec: B101 + assert isinstance(ferc1_xbrl_taxonomy_metadata, dict) + assert "plants_steam_ferc1" in ferc1_xbrl_taxonomy_metadata + assert len(ferc1_xbrl_taxonomy_metadata) > 10 + assert len(ferc1_xbrl_taxonomy_metadata) < 100 # Can we normalize that list of entities and find data in it that we expect? df = pd.json_normalize( diff --git a/test/integration/ferc_dbf_extract_test.py b/test/integration/ferc_dbf_extract_test.py index 72745e9712..38b09008c6 100644 --- a/test/integration/ferc_dbf_extract_test.py +++ b/test/integration/ferc_dbf_extract_test.py @@ -16,10 +16,8 @@ def test_ferc1_dbf2sqlite(ferc1_engine_dbf): """Attempt to access the DBF based FERC 1 SQLite DB fixture.""" - assert isinstance(ferc1_engine_dbf, sa.engine.Engine) # nosec: B101 - assert ( # nosec: B101 - "f1_respondent_id" in sa.inspect(ferc1_engine_dbf).get_table_names() - ) + assert isinstance(ferc1_engine_dbf, sa.Engine) + assert "f1_respondent_id" in sa.inspect(ferc1_engine_dbf).get_table_names() @pytest.mark.parametrize( diff --git a/test/integration/glue_test.py b/test/integration/glue_test.py index 2d8a7397e8..6026adaf05 100644 --- a/test/integration/glue_test.py +++ b/test/integration/glue_test.py @@ -86,7 +86,7 @@ def glue_test_dfs( ) # Make everything lowercase glue_test_dfs = { - df_name: df.applymap(lambda x: x.lower() if isinstance(x, str) else x) + df_name: df.map(lambda x: x.lower() if isinstance(x, str) else x) for (df_name, df) in glue_test_dfs.items() } diff --git a/test/unit/io_managers_test.py b/test/unit/io_managers_test.py index 2a3371632e..eb407246c9 100644 --- a/test/unit/io_managers_test.py +++ b/test/unit/io_managers_test.py @@ -256,7 +256,7 @@ def test_migrations_match_metadata(tmp_path, monkeypatch): # alembic wants current directory to be the one with `alembic.ini` in it monkeypatch.chdir(Path(__file__).parent.parent.parent) # alembic knows to use PudlPaths().pudl_db - so we need to set PUDL_OUTPUT env var - monkeypatch.setenv("PUDL_OUTPUT", tmp_path) + monkeypatch.setenv("PUDL_OUTPUT", str(tmp_path)) # run all the migrations on a fresh DB at tmp_path/pudl.sqlite alembic.config.main(["upgrade", "head"]) diff --git a/test/unit/settings_test.py b/test/unit/settings_test.py index 3a966ea93e..d2495be82d 100644 --- a/test/unit/settings_test.py +++ b/test/unit/settings_test.py @@ -1,5 +1,7 @@ """Tests for settings validation.""" +from typing import Self + import pytest from dagster import DagsterInvalidConfigError, Field, build_init_resource_context from pandas import json_normalize @@ -25,7 +27,7 @@ class TestGenericDatasetSettings: """Test generic dataset behavior.""" - def test_missing_field_error(self): + def test_missing_field_error(self: Self): """Test GenericDatasetSettings throws error if user forgets to add a field. In this case, the required ``data_source`` parameter is missing. @@ -45,7 +47,7 @@ class Test(GenericDatasetSettings): class TestFerc1DbfToSqliteSettings: """Test Ferc1DbfToSqliteSettings.""" - def test_ref_year(self): + def test_ref_year(self: Self): """Test reference year is within working years.""" with pytest.raises(ValidationError): Ferc1DbfToSqliteSettings(ferc1_to_sqlite_refyear=1990) @@ -57,19 +59,22 @@ class TestFerc1Settings: EIA860 and EIA923 use the same validation functions. """ - def test_not_working_year(self): + def test_not_working_year(self: Self): """Make sure a validation error is being thrown when given an invalid year.""" with pytest.raises(ValidationError): Ferc1Settings(years=[1901]) - def test_duplicate_sort_years(self): + def test_duplicate_sort_years(self: Self): """Test years are sorted and deduplicated.""" - returned_settings = Ferc1Settings(years=[2001, 2001, 2000]) - expected_years = [2000, 2001] + with pytest.raises(ValidationError): + _ = Ferc1Settings(years=[2001, 2001, 2000]) - assert expected_years == returned_settings.years + def test_none_years_raise(self: Self): + """Test years are sorted and deduplicated.""" + with pytest.raises(ValidationError): + _ = Ferc1Settings(years=None) - def test_default_years(self): + def test_default_years(self: Self): """Test all years are used as default.""" returned_settings = Ferc1Settings() @@ -86,32 +91,35 @@ def test_default_years(self): class TestEpaCemsSettings: """Test EpaCems settings validation.""" - def test_not_working_state(self): + def test_not_working_state(self: Self): """Make sure a validation error is being thrown when given an invalid state.""" with pytest.raises(ValidationError): EpaCemsSettings(states=["fake_state"]) - def test_duplicate_sort_states(self): + def test_duplicate_sort_states(self: Self): """Test states are sorted and deduplicated.""" - returned_settings = EpaCemsSettings(states=["CA", "CA", "AL"]) - expected_states = ["AL", "CA"] - - assert expected_states == returned_settings.states + with pytest.raises(ValidationError): + _ = EpaCemsSettings(states=["CA", "CA", "AL"]) - def test_default_states(self): + def test_default_states(self: Self): """Test all states are used as default.""" returned_settings = EpaCemsSettings() expected_states = DataSource.from_id("epacems").working_partitions["states"] assert expected_states == returned_settings.states - def test_all_states(self): + def test_all_states(self: Self): """Test all states are used as default.""" returned_settings = EpaCemsSettings(states=["all"]) expected_states = DataSource.from_id("epacems").working_partitions["states"] assert expected_states == returned_settings.states + def test_none_states_raise(self: Self): + """Test that setting a required partition to None raises an error.""" + with pytest.raises(ValidationError): + _ = EpaCemsSettings(states=None) + class TestEIA860Settings: """Test EIA860 setting validation. @@ -119,7 +127,7 @@ class TestEIA860Settings: Most of the validation is covered in TestFerc1Settings. """ - def test_860m(self): + def test_860m(self: Self): """Test validation error is raised when eia860m date is within 860 years.""" settings_cls = Eia860Settings original_eia80m_date = settings_cls.eia860m_date @@ -133,7 +141,7 @@ def test_860m(self): class TestEiaSettings: """Test pydantic model that validates EIA datasets.""" - def test_eia923_dependency(self): + def test_eia923_dependency(self: Self): """Test that there is some overlap between EIA860 and EIA923 data.""" eia923_settings = Eia923Settings() settings = EiaSettings(eia923=eia923_settings) @@ -147,7 +155,7 @@ def test_eia923_dependency(self): assert not set(eia860_years).isdisjoint(eia923_years_partition) assert not set(eia860_years).isdisjoint(eia923_years_settings) - def test_eia860_dependency(self): + def test_eia860_dependency(self: Self): """Test that there is some overlap between EIA860 and EIA923 data.""" eia860_settings = Eia860Settings() settings = EiaSettings(eia860=eia860_settings) @@ -165,7 +173,7 @@ def test_eia860_dependency(self): class TestDatasetsSettings: """Test pydantic model that validates all datasets.""" - def test_default_behavior(self): + def test_default_behavior(self: Self): """Make sure all of the years are added if nothing is specified.""" settings = DatasetsSettings() data_source = DataSource.from_id("ferc1") @@ -176,7 +184,7 @@ def test_default_behavior(self): assert settings.eia, "EIA settings were not added." - def test_glue(self): + def test_glue(self: Self): """Test glue settings get added when ferc and eia are requested.""" settings = DatasetsSettings() assert settings.glue, "Glue settings we not added when they should have been." @@ -184,7 +192,7 @@ def test_glue(self): assert settings.glue.eia assert settings.glue.ferc1 - def test_convert_settings_to_dagster_config(self): + def test_convert_settings_to_dagster_config(self: Self): """Test conversion of dictionary to Dagster config.""" dct = { "eia": { @@ -209,21 +217,21 @@ def test_convert_settings_to_dagster_config(self): class TestGlobalConfig: """Test global pydantic model config works.""" - def test_unknown_dataset(self): + def test_unknown_dataset(self: Self): """Test unkown dataset fed to DatasetsSettings.""" with pytest.raises(ValidationError): - DatasetsSettings().parse_obj({"unknown_data": "data"}) + DatasetsSettings().model_validate({"unknown_data": "data"}) with pytest.raises(ValidationError): - EiaSettings().parse_obj({"unknown_data": "data"}) + EiaSettings().model_validate({"unknown_data": "data"}) - def test_immutability(self): + def test_immutability(self: Self): """Test immutability config is working correctly.""" - with pytest.raises(TypeError): + with pytest.raises(ValidationError): settings = DatasetsSettings() settings.eia = EiaSettings() - with pytest.raises(TypeError): + with pytest.raises(ValidationError): settings = EiaSettings() settings.eia860 = Eia860Settings() @@ -231,7 +239,7 @@ def test_immutability(self): class TestDatasetsSettingsResource: """Test the DatasetsSettings dagster resource.""" - def test_invalid_datasource(self): + def test_invalid_datasource(self: Self): """Test an error is thrown when there is an invalid datasource in the config.""" init_context = build_init_resource_context( config={"new_datasource": {"years": [1990]}} @@ -239,13 +247,13 @@ def test_invalid_datasource(self): with pytest.raises(DagsterInvalidConfigError): _ = dataset_settings(init_context) - def test_invalid_field_type(self): + def test_invalid_field_type(self: Self): """Test an error is thrown when there is an incorrect type in the config.""" init_context = build_init_resource_context(config={"ferc1": {"years": 2021}}) with pytest.raises(DagsterInvalidConfigError): _ = dataset_settings(init_context) - def test_default_values(self): + def test_default_values(self: Self): """Test the correct default values are created for dagster config.""" expected_states = EpaCemsSettings().states assert ( diff --git a/test/unit/transform/classes_test.py b/test/unit/transform/classes_test.py index e53037ce42..fe2227991a 100644 --- a/test/unit/transform/classes_test.py +++ b/test/unit/transform/classes_test.py @@ -358,7 +358,7 @@ "id": int, "year": pd.Int64Dtype(), "capacity_mw": float, - "plant_name": str, + "plant_name": pd.StringDtype(), } ) MIXED_TYPE_DATA["report_date"] = pd.to_datetime( @@ -384,7 +384,7 @@ "id": int, "year": pd.Int64Dtype(), "capacity_mw": float, - "plant_name": str, + "plant_name": pd.StringDtype(), } ) SPOT_FIXED_MIXED_TYPE_DATA["report_date"] = pd.to_datetime( @@ -427,15 +427,15 @@ def fill_values(col: pd.Series, params: FillValue): MULTICOL_DROP_EXPECTED = pd.DataFrame( columns=["a_filled", "b_filled", "c_filled"], data=[ - (0.0, 1.0, 2.0), - (42.0, 17.0, 0.0), + (0.0, 1, 2.0), + (42.0, 17, 0.0), ], ) MULTICOL_NODROP_EXPECTED = pd.DataFrame( columns=["a", "b", "c", "a_filled", "b_filled", "c_filled"], data=[ - (0.0, 1, 2.0, 0.0, 1.0, 2.0), - (np.nan, pd.NA, None, 42.0, 17.0, 0.0), + (0.0, 1, 2.0, 0.0, 1, 2.0), + (np.nan, pd.NA, None, 42.0, 17, 0.0), ], ) diff --git a/test/validate/eia_test.py b/test/validate/eia_test.py index 2201ef4ac0..3d9743e4dd 100644 --- a/test/validate/eia_test.py +++ b/test/validate/eia_test.py @@ -45,15 +45,15 @@ def test_no_null_cols_eia(pudl_out_eia, live_dbs, cols, df_name): @pytest.mark.parametrize( "df_name,raw_rows,monthly_rows,annual_rows", [ - ("bf_eia923", 1_559_257, 1_559_257, 127_412), - ("bga_eia860", 141_652, 141_652, 141_652), - ("boil_eia860", 83_356, 83_356, 83_356), - ("frc_eia923", 639_647, 261_583, 25_370), - ("gen_eia923", None, 5_179_377, 433_332), - ("gens_eia860", 556_948, 556_948, 556_948), - ("gf_eia923", 2_879_884, 2_879_884, 244_795), + ("bf_eia923", 1_569_568, 1_569_568, 128_252), + ("bga_eia860", 142_391, 142_391, 142_391), + ("boil_eia860", 83_416, 83_416, 83_416), + ("frc_eia923", 646_677, 264_043, 25_443), + ("gen_eia923", None, 5_179_478, 433_336), + ("gens_eia860", 556_949, 556_949, 556_949), + ("gf_eia923", 2_907_735, 2_907_735, 246_324), ("own_eia860", 89_741, 89_741, 89_741), - ("plants_eia860", 200_514, 200_514, 200_514), + ("plants_eia860", 200_511, 200_511, 200_511), ("pu_eia860", 199_635, 199_635, 199_635), ("utils_eia860", 139_883, 139_883, 139_883), ("emissions_control_equipment_eia860", 56_616, 56_616, 56_616), diff --git a/test/validate/mcoe_test.py b/test/validate/mcoe_test.py index b1efd9c982..4d3e845b73 100644 --- a/test/validate/mcoe_test.py +++ b/test/validate/mcoe_test.py @@ -106,11 +106,11 @@ def test_no_null_rows_mcoe(pudl_out_mcoe, live_dbs, df_name, thresh): @pytest.mark.parametrize( "df_name,monthly_rows,annual_rows", [ - ("hr_by_unit", 387_670, 32_414), - ("hr_by_gen", 599_496, 50_070), - ("fuel_cost", 599_496, 50_070), - ("capacity_factor", 5_179_377, 433_332), - ("mcoe", 5_179_785, 433_366), + ("hr_by_unit", 389_530, 32_569), + ("hr_by_gen", 602_580, 50_327), + ("fuel_cost", 602_580, 50_327), + ("capacity_factor", 5_179_478, 433_336), + ("mcoe", 5_179_886, 433_370), ], ) def test_minmax_rows_mcoe(pudl_out_mcoe, live_dbs, monthly_rows, annual_rows, df_name):