Skip to content

Commit

Permalink
Merge pull request #3153 from catalyst-cooperative/dev
Browse files Browse the repository at this point in the history
Merge dev into main for 2023-12-13
  • Loading branch information
zaneselvans authored Dec 13, 2023
2 parents a440c68 + 0009827 commit 25c2ce7
Show file tree
Hide file tree
Showing 34 changed files with 1,215 additions and 1,037 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bot-auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Impersonate auto merge PR bot
uses: tibdex/github-app-token@v1
uses: tibdex/github-app-token@v2
id: generate-token
with:
app_id: ${{ secrets.BOT_AUTO_MERGE_PRS_APP_ID }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-deploy-pudl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- name: Docker Metadata
id: docker_metadata
uses: docker/metadata-action@v4.4.0
uses: docker/metadata-action@v5.3.0
with:
images: catalystcoop/pudl-etl
flavor: |
Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:

# Setup gcloud CLI
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
uses: google-github-actions/setup-gcloud@v2

- name: Determine commit information
run: |-
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

- name: Docker Metadata
id: docker_metadata
uses: docker/metadata-action@v4.4.0
uses: docker/metadata-action@v5.3.0
with:
images: catalystcoop/pudl-etl
flavor: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
with:
fetch-depth: 2
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Build source and wheel distributions
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-etl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
uses: actions/checkout@v4
- name: Docker Metadata
id: docker_metadata
uses: docker/metadata-action@v4.4.0
uses: docker/metadata-action@v5.3.0
# TODO(rousik): we could consider YYYY-MM-DD-HHMM-branch-sha
with:
images: catalystcoop/pudl-etl-ci
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/zenodo-cache-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
service_account: "zenodo-cache-manager@catalyst-cooperative-pudl.iam.gserviceaccount.com"

- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
uses: google-github-actions/setup-gcloud@v2

- name: Update GCS cache with any new Zenodo archives
run: |
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ repos:
# Formatters: hooks that re-write Python & documentation files
####################################################################################
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.6
rev: v0.1.7
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
102 changes: 102 additions & 0 deletions devtools/sqlite-table-diff.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"Example of diffing tables across multiple different SQLite DBs.\n",
"\n",
"The tables must have the same name/schema. This is intended for use in\n",
"investigating validation test errors.\n",
"\"\"\"\n",
"import sqlite3\n",
"from pathlib import Path\n",
"from typing import Iterable\n",
"\n",
"import pandas as pd\n",
"\n",
"from pudl.helpers import diff_wide_tables, TableDiff\n",
"from pudl.metadata.classes import Resource\n",
"from pudl.metadata.fields import apply_pudl_dtypes\n",
"\n",
"\n",
"def table_diff(\n",
" table_name: str,\n",
" old_conn,\n",
" new_conn,\n",
" ignore_cols: Iterable[str] = (\"plant_id_ferc1\",),\n",
" addl_key_cols: Iterable[str] = (),\n",
" ) -> TableDiff:\n",
" \"\"\"Diff two versions of the same table that live in SQL databases.\n",
"\n",
" The table has to have the same name + columns in both DBs.\n",
"\n",
" Args:\n",
" table_name: the name, in the SQL database, of the table you want to compare.\n",
" old_conn: SQLite connection to the old version of the database.\n",
" new_conn: SQLite connection to the new version of the database.\n",
" ignore_cols: a list of columns that you would like to ignore diffs in.\n",
" addl_key_cols: \n",
" columns that aren't necessarily in the primary key, but that you'd\n",
" like to use as key columns for the diff - for example, if your\n",
" table only uses `record_id` as primary_key, but you want to group\n",
" the rows by `record_year` and `utility_id` as well, you would pass\n",
" those in.\n",
" \"\"\"\n",
" query = f\"SELECT * FROM {table_name}\" # noqa: S608\n",
" old_table = apply_pudl_dtypes(pd.read_sql(query, old_conn))\n",
" new_table = apply_pudl_dtypes(pd.read_sql(query, new_conn))\n",
"\n",
" cols = list(set(old_table.columns) - set(ignore_cols))\n",
"\n",
" primary_key = list(set(Resource.from_id(table_name).schema.primary_key).union(set(addl_key_cols)))\n",
" return diff_wide_tables(primary_key, old_table[cols], new_table[cols])\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"new_db = sqlite3.connect(Path(\"~/Downloads/pudl.sqlite\").expanduser())\n",
"old_db = sqlite3.connect(Path(\"~/Downloads/pudl (2).sqlite\").expanduser())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table_name = \"denorm_plants_steam_ferc1\"\n",
"diff = table_diff(table_name, old_db, new_db, ignore_cols=(\"plant_id_ferc1\", \"plant_id_pudl\"), addl_key_cols=(\"report_year\", \"utility_id_pudl\"))\n",
"diff.changed"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pudl-dev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
6 changes: 6 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ v2023.12.XX
outputs describing historical utility and balancing authority service territories. See
:issue:`1174` and :pr:`3086`.

Data Coverage
^^^^^^^^^^^^^
* Updated :doc:`data_sources/epacems` to switch to pulling the quarterly updates of
CEMS instead of the annual files. Integrates CEMS through 2023q3. See issue
:issue:`2973` & PR :pr:`3096`.

---------------------------------------------------------------------------------------
v2023.12.01
---------------------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 25c2ce7

Please sign in to comment.