From 828e72a331892a1537241736f215b7a8946c252a Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 29 Jan 2025 22:46:56 -0500 Subject: [PATCH 1/3] clarify signals affected (#1591) --- docs/api/covidcast-signals/google-symptoms.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/covidcast-signals/google-symptoms.md b/docs/api/covidcast-signals/google-symptoms.md index 684a429e8..c9f94a930 100644 --- a/docs/api/covidcast-signals/google-symptoms.md +++ b/docs/api/covidcast-signals/google-symptoms.md @@ -108,7 +108,7 @@ updates every day and provide the most up-to-date data. ## Limitations Between May 13 2024 and August 6 2024, signal values were 25%-50% lower compared to previous time periods. -This affected _all_ signals and symptom sets. +This affected _all_ `google-symptoms` signals and symptom sets. The drop does not reflect actual search term popularity during the affected period. The apparent decrease in search volume was caused by an outage in the data pipeline on the source side. The data was unfortunately not recoverable and the dip can not be repaired, but data outside the listed time period is unaffected. From 5863680e813b35bcf3646de6e3aeaea61ebd694b Mon Sep 17 00:00:00 2001 From: Nolan Gormley Date: Mon, 3 Feb 2025 14:27:32 -0500 Subject: [PATCH 2/3] Signal Documentation Coverage Endpoint (#1584) new `geo_coverage` endpoint with backing table and views, plus a utility to compute the table contents, as well as tests --------- Co-authored-by: Amaris Sim Co-authored-by: george --- .../test_coverage_crossref_update.py | 93 +++++++++++++++++++ src/acquisition/covidcast/database.py | 33 +++++++ src/ddl/v4_schema.sql | 21 +++++ src/ddl/v4_schema_aliases.sql | 1 + src/maintenance/coverage_crossref_updater.py | 33 +++++++ src/server/endpoints/covidcast.py | 18 ++++ tests/acquisition/covidcast/test_database.py | 16 ++++ 7 files changed, 215 insertions(+) create mode 100644 integrations/acquisition/covidcast/test_coverage_crossref_update.py create mode 100644 src/maintenance/coverage_crossref_updater.py diff --git a/integrations/acquisition/covidcast/test_coverage_crossref_update.py b/integrations/acquisition/covidcast/test_coverage_crossref_update.py new file mode 100644 index 000000000..f52f17141 --- /dev/null +++ b/integrations/acquisition/covidcast/test_coverage_crossref_update.py @@ -0,0 +1,93 @@ +"""Integration tests for the covidcast `geo_coverage` endpoint.""" + +# standard library +import json +import unittest + +# third party +import mysql.connector +import requests + +# first party +from delphi_utils import Nans +from delphi.epidata.client.delphi_epidata import Epidata +import delphi.operations.secrets as secrets +import delphi.epidata.acquisition.covidcast.database as live +from delphi.epidata.maintenance.coverage_crossref_updater import main +from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow + +# use the local instance of the Epidata API +BASE_URL = 'http://delphi_web_epidata/epidata' # NOSONAR + + +class CoverageCrossrefTests(CovidcastBase): + """Tests coverage crossref updater.""" + + def localSetUp(self): + """Perform per-test setup.""" + self._db._cursor.execute('TRUNCATE TABLE `coverage_crossref`') + + @staticmethod + def _make_request(params): + response = requests.get(f"{Epidata.BASE_URL}/covidcast/geo_coverage", params=params, auth=Epidata.auth) + response.raise_for_status() + return response.json() + + def test_caching(self): + """Populate, query, cache, query, and verify the cache.""" + + # insert dummy data + self._insert_rows([ + CovidcastTestRow.make_default_row(geo_type="state", geo_value="pa"), + CovidcastTestRow.make_default_row(geo_type="state", geo_value="ny"), + CovidcastTestRow.make_default_row(geo_type="state", geo_value="ny", signal="sig2"), + ]) + + results = self._make_request(params = {'geo': 'state:*'}) + + # make sure the tables are empty + self.assertEqual(results, { + 'result': -2, + 'epidata': [], + 'message': 'no results', + }) + + # update the coverage crossref table + main() + + results = self._make_request(params = {'geo': 'state:*'}) + + # make sure the data was actually served + self.assertEqual(results, { + 'result': 1, + 'epidata': [{'signal': 'sig', 'source': 'src'}, {'signal': 'sig2', 'source': 'src'}], + 'message': 'success', + }) + + results = self._make_request(params = {'geo': 'hrr:*'}) + + # make sure the tables are empty + self.assertEqual(results, { + 'result': -2, + 'epidata': [], + 'message': 'no results', + }) + + results = self._make_request(params = {'geo': 'state:pa'}) + + # make sure the data was actually served + self.assertEqual(results, { + 'result': 1, + 'epidata': [{'signal': 'sig', 'source': 'src'}], + 'message': 'success', + }) + + results = self._make_request(params = {'geo': 'state:ny'}) + + # make sure the data was actually served + self.assertEqual(results, { + 'result': 1, + 'epidata': [{'signal': 'sig', 'source': 'src'}, {'signal': 'sig2', 'source': 'src'}], + 'message': 'success', + }) + diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py index 5fd56923b..80590859d 100644 --- a/src/acquisition/covidcast/database.py +++ b/src/acquisition/covidcast/database.py @@ -561,3 +561,36 @@ def retrieve_covidcast_meta_cache(self): for entry in cache: cache_hash[(entry['data_source'], entry['signal'], entry['time_type'], entry['geo_type'])] = entry return cache_hash + + def compute_coverage_crossref(self): + """Compute coverage_crossref table, for looking up available signals per geo or vice versa.""" + + logger = get_structured_logger("compute_coverage_crossref") + + coverage_crossref_delete_sql = ''' + DELETE FROM coverage_crossref; + ''' + + coverage_crossref_update_sql = ''' + INSERT INTO coverage_crossref (signal_key_id, geo_key_id, min_time_value, max_time_value) + SELECT + signal_key_id, + geo_key_id, + MIN(time_value) AS min_time_value, + MAX(time_value) AS max_time_value + FROM covid.epimetric_latest + GROUP BY signal_key_id, geo_key_id; + ''' + + self._connection.start_transaction() + + self._cursor.execute(coverage_crossref_delete_sql) + logger.info("coverage_crossref_delete", rows=self._cursor.rowcount) + + self._cursor.execute(coverage_crossref_update_sql) + logger.info("coverage_crossref_update", rows=self._cursor.rowcount) + + self.commit() + logger.info("coverage_crossref committed") + + return self._cursor.rowcount \ No newline at end of file diff --git a/src/ddl/v4_schema.sql b/src/ddl/v4_schema.sql index 7551707f6..e151bb251 100644 --- a/src/ddl/v4_schema.sql +++ b/src/ddl/v4_schema.sql @@ -164,3 +164,24 @@ CREATE TABLE `covidcast_meta_cache` ( PRIMARY KEY (`timestamp`) ) ENGINE=InnoDB; INSERT INTO covidcast_meta_cache VALUES (0, '[]'); + +CREATE TABLE `coverage_crossref` ( + `signal_key_id` bigint NOT NULL, + `geo_key_id` bigint NOT NULL, + `min_time_value` int NOT NULL, + `max_time_value` int NOT NULL, + UNIQUE INDEX coverage_crossref_geo_sig (`geo_key_id`, `signal_key_id`), + INDEX coverage_crossref_sig_geo (`signal_key_id`, `geo_key_id`) +) ENGINE=InnoDB; + +CREATE OR REPLACE VIEW `coverage_crossref_v` AS +SELECT + `sd`.`source`, + `sd`.`signal`, + `gd`.`geo_type`, + `gd`.`geo_value`, + `cc`.`min_time_value`, + `cc`.`max_time_value` +FROM `coverage_crossref` `cc` +JOIN `signal_dim` `sd` USING (`signal_key_id`) +JOIN `geo_dim` `gd` USING (`geo_key_id`); diff --git a/src/ddl/v4_schema_aliases.sql b/src/ddl/v4_schema_aliases.sql index f5c6340e9..9584fcf3a 100644 --- a/src/ddl/v4_schema_aliases.sql +++ b/src/ddl/v4_schema_aliases.sql @@ -8,3 +8,4 @@ CREATE VIEW `epidata`.`epimetric_full_v` AS SELECT * FROM `covid`.`epimetric_full_v`; CREATE VIEW `epidata`.`epimetric_latest_v` AS SELECT * FROM `covid`.`epimetric_latest_v`; CREATE VIEW `epidata`.`covidcast_meta_cache` AS SELECT * FROM `covid`.`covidcast_meta_cache`; +CREATE VIEW `epidata`.`coverage_crossref_v` AS SELECT * FROM `covid`.`coverage_crossref_v`; diff --git a/src/maintenance/coverage_crossref_updater.py b/src/maintenance/coverage_crossref_updater.py new file mode 100644 index 000000000..002f5d023 --- /dev/null +++ b/src/maintenance/coverage_crossref_updater.py @@ -0,0 +1,33 @@ +"""Updates the table for the `coverage_crossref` endpoint.""" + +import time + +from delphi.epidata.acquisition.covidcast.database import Database +from delphi_utils import get_structured_logger + + +def main(): + """Updates the table for the `coverage_crossref`.""" + + logger = get_structured_logger("coverage_crossref_updater") + start_time = time.time() + database = Database() + database.connect() + + # compute and update coverage_crossref + try: + coverage = database.compute_coverage_crossref() + finally: + # clean up in success and in failure + database.disconnect(True) + + logger.info(f"coverage_crossref returned: {coverage}") + + logger.info( + "Generated and updated covidcast geo/signal coverage", + total_runtime_in_seconds=round(time.time() - start_time, 2)) + return True + + +if __name__ == '__main__': + main() diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index 3d7d99e82..f4875fb6d 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -542,6 +542,24 @@ def transform_row(row, proxy): return execute_query(q.query, q.params, fields_string, fields_int, [], transform=transform_row) +@bp.route("/geo_coverage", methods=("GET", "POST")) +def handle_geo_coverage(): + """ + For a specific geo returns the signal coverage (number of signals for a given geo_type) + """ + + geo_sets = parse_geo_sets() + + q = QueryBuilder("coverage_crossref_v", "c") + fields_string = ["source", "signal"] + + q.set_fields(fields_string) + + q.apply_geo_filters("geo_type", "geo_value", geo_sets) + q.set_sort_order("source", "signal") + q.group_by = ["c." + field for field in fields_string] # this condenses duplicate results, similar to `SELECT DISTINCT` + + return execute_query(q.query, q.params, fields_string, [], []) @bp.route("/anomalies", methods=("GET", "POST")) def handle_anomalies(): diff --git a/tests/acquisition/covidcast/test_database.py b/tests/acquisition/covidcast/test_database.py index b676e7413..cc998f1ac 100644 --- a/tests/acquisition/covidcast/test_database.py +++ b/tests/acquisition/covidcast/test_database.py @@ -78,6 +78,22 @@ def test_update_covidcast_meta_cache_query(self): self.assertIn('timestamp', sql) self.assertIn('epidata', sql) + def test_compute_coverage_crossref_query(self): + """Query to update the compute crossref looks sensible. + + NOTE: Actual behavior is tested by integration test. + """ + + mock_connector = MagicMock() + database = Database() + database.connect(connector_impl=mock_connector) + + database.compute_coverage_crossref() + + connection = mock_connector.connect() + cursor = connection.cursor() + self.assertTrue(cursor.execute.called) + def test_insert_or_update_batch_exception_reraised(self): """Test that an exception is reraised""" mock_connector = MagicMock() From c6f4a191b766ba3df2691ee55e87e99f054eb55d Mon Sep 17 00:00:00 2001 From: melange396 Date: Tue, 4 Feb 2025 16:33:59 +0000 Subject: [PATCH 3/3] chore: release delphi-epidata 4.1.30 --- .bumpversion.cfg | 2 +- dev/local/setup.cfg | 2 +- src/client/delphi_epidata.R | 2 +- src/client/delphi_epidata.js | 2 +- src/client/packaging/npm/package.json | 2 +- src/server/_config.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a50b8cd7b..74ada191a 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 4.1.29 +current_version = 4.1.30 commit = False tag = False diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg index 66c7961d9..3de70788d 100644 --- a/dev/local/setup.cfg +++ b/dev/local/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = Delphi Development -version = 4.1.29 +version = 4.1.30 [options] packages = diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 5f07e84fe..7c71fe9ca 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -15,7 +15,7 @@ Epidata <- (function() { # API base url BASE_URL <- getOption('epidata.url', default = 'https://api.delphi.cmu.edu/epidata/') - client_version <- '4.1.29' + client_version <- '4.1.30' auth <- getOption("epidata.auth", default = NA) diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js index f38e1137d..1f07ec133 100644 --- a/src/client/delphi_epidata.js +++ b/src/client/delphi_epidata.js @@ -22,7 +22,7 @@ } })(this, function (exports, fetchImpl, jQuery) { const BASE_URL = "https://api.delphi.cmu.edu/epidata/"; - const client_version = "4.1.29"; + const client_version = "4.1.30"; // Helper function to cast values and/or ranges to strings function _listitem(value) { diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json index 67d4c73db..3a785da7a 100644 --- a/src/client/packaging/npm/package.json +++ b/src/client/packaging/npm/package.json @@ -2,7 +2,7 @@ "name": "delphi_epidata", "description": "Delphi Epidata API Client", "authors": "Delphi Group", - "version": "4.1.29", + "version": "4.1.30", "license": "MIT", "homepage": "https://github.com/cmu-delphi/delphi-epidata", "bugs": { diff --git a/src/server/_config.py b/src/server/_config.py index 8774a792b..c542c8ff0 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -7,7 +7,7 @@ load_dotenv() -VERSION = "4.1.29" +VERSION = "4.1.30" MAX_RESULTS = int(10e6) MAX_COMPATIBILITY_RESULTS = int(3650)