diff --git a/integrations/acquisition/covidcast/delete_batch.csv b/integrations/acquisition/covidcast/delete_batch.csv index 5c1602218..14cf71e16 100644 --- a/integrations/acquisition/covidcast/delete_batch.csv +++ b/integrations/acquisition/covidcast/delete_batch.csv @@ -1,4 +1,4 @@ geo_id,value,stderr,sample_size,issue,time_value,geo_type,signal,source -d_nonlatest,0,0,0,1,0,county,sig,src -d_latest, 0,0,0,3,0,county,sig,src -d_justone, 0,0,0,1,0,county,sig,src \ No newline at end of file +d_nonlatest,0,0,0,1,0,msa,sig,src +d_latest, 0,0,0,3,0,msa,sig,src +d_justone, 0,0,0,1,0,msa,sig,src \ No newline at end of file diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index 82c1452ec..0c8c3e35d 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -12,11 +12,10 @@ # third party import delphi.operations.secrets as secrets from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache -from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow +from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA from delphi.epidata.client.delphi_epidata import Epidata from delphi_utils import Nans - # py3tester coverage target __test_target__ = 'delphi.epidata.client.delphi_epidata' # all the Nans we use here are just one value, so this is a shortcut to it: @@ -219,10 +218,10 @@ def test_geo_value(self): # insert placeholder data: three counties, three MSAs N = 3 rows = [ - CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i) + CovidcastTestRow.make_default_row(geo_type="fips", geo_value=FIPS[i], value=i) for i in range(N) ] + [ - CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10) + CovidcastTestRow.make_default_row(geo_type="msa", geo_value=MSA[i], value=i*10) for i in range(N) ] self._insert_rows(rows) @@ -241,26 +240,28 @@ def fetch(geo): self.assertEqual(request['message'], 'success') self.assertEqual(request['epidata'], counties) # test fetch a specific region - request = fetch('11111') + request = fetch([FIPS[0]]) self.assertEqual(request['message'], 'success') - self.assertEqual(request['epidata'], [counties[1]]) + self.assertEqual(request['epidata'], [counties[0]]) # test fetch a specific yet not existing region request = fetch('55555') - self.assertEqual(request['message'], 'no results') + self.assertEqual(request['message'], 'Invalid geo_value(s) 55555 for the requested geo_type fips') # test fetch a multiple regions - request = fetch(['11111', '22222']) + request = fetch([FIPS[0], FIPS[1]]) self.assertEqual(request['message'], 'success') - self.assertEqual(request['epidata'], [counties[1], counties[2]]) + self.assertEqual(request['epidata'], [counties[0], counties[1]]) # test fetch a multiple regions in another variant - request = fetch(['00000', '22222']) + request = fetch([FIPS[0], FIPS[2]]) self.assertEqual(request['message'], 'success') self.assertEqual(request['epidata'], [counties[0], counties[2]]) # test fetch a multiple regions but one is not existing - request = fetch(['11111', '55555']) - self.assertEqual(request['message'], 'success') - self.assertEqual(request['epidata'], [counties[1]]) + request = fetch([FIPS[0], '55555']) + self.assertEqual(request['message'], 'Invalid geo_value(s) 55555 for the requested geo_type fips') # test fetch a multiple regions but specify no region request = fetch([]) + self.assertEqual(request['message'], 'geo_value is empty for the requested geo_type fips!') + # test fetch a region with no results + request = fetch([FIPS[3]]) self.assertEqual(request['message'], 'no results') def test_covidcast_meta(self): @@ -325,10 +326,10 @@ def test_async_epidata(self): # insert placeholder data: three counties, three MSAs N = 3 rows = [ - CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i) + CovidcastTestRow.make_default_row(geo_type="fips", geo_value=FIPS[i-1], value=i) for i in range(N) ] + [ - CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10) + CovidcastTestRow.make_default_row(geo_type="msa", geo_value=MSA[i-1], value=i*10) for i in range(N) ] self._insert_rows(rows) diff --git a/integrations/server/test_covidcast.py b/integrations/server/test_covidcast.py index 5a8df96f0..01d81bf29 100644 --- a/integrations/server/test_covidcast.py +++ b/integrations/server/test_covidcast.py @@ -10,7 +10,7 @@ # first party from delphi_utils import Nans -from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow +from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA from delphi.epidata.client.delphi_epidata import Epidata # use the local instance of the Epidata API @@ -37,11 +37,10 @@ def _insert_placeholder_set_one(self): def _insert_placeholder_set_two(self): rows = [ - CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.) + CovidcastTestRow.make_default_row(geo_type='msa', geo_value=MSA[i-1], value=i*1., stderr=i*10., sample_size=i*100.) for i in [1, 2, 3] ] + [ - # geo value intended to overlap with counties above - CovidcastTestRow.make_default_row(geo_type='msa', geo_value=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.) + CovidcastTestRow.make_default_row(geo_type='fips', geo_value=FIPS[i-4], value=i*1., stderr=i*10., sample_size=i*100.) for i in [4, 5, 6] ] self._insert_rows(rows) @@ -49,11 +48,11 @@ def _insert_placeholder_set_two(self): def _insert_placeholder_set_three(self): rows = [ - CovidcastTestRow.make_default_row(geo_type='county', geo_value='11111', time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i) + CovidcastTestRow.make_default_row(time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i) for i in [1, 2, 3] ] + [ - # time value intended to overlap with 11111 above, with disjoint geo values - CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i) + # time value intended to overlap with the time values above, with disjoint geo values + CovidcastTestRow.make_default_row(geo_value=MSA[i-3], time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i) for i in [4, 5, 6] ] self._insert_rows(rows) @@ -295,7 +294,7 @@ def test_signal_wildcard(self): }) def test_geo_value(self): - """test different variants of geo types: single, *, multi.""" + """test whether geo values are valid for specific geo types""" # insert placeholder data rows = self._insert_placeholder_set_two() @@ -308,26 +307,28 @@ def fetch(geo_value): return response # test fetch a specific region - r = fetch('11111') + r = fetch(MSA[0]) self.assertEqual(r['message'], 'success') self.assertEqual(r['epidata'], expected[0:1]) # test fetch a specific yet not existing region - r = fetch('55555') - self.assertEqual(r['message'], 'no results') + r = fetch('11111') + self.assertEqual(r['message'], 'Invalid geo_value(s) 11111 for the requested geo_type msa') # test fetch multiple regions - r = fetch('11111,22222') + r = fetch(f'{MSA[0]},{MSA[1]}') self.assertEqual(r['message'], 'success') self.assertEqual(r['epidata'], expected[0:2]) # test fetch multiple noncontiguous regions - r = fetch('11111,33333') + r = fetch(f'{MSA[0]},{MSA[2]}') self.assertEqual(r['message'], 'success') self.assertEqual(r['epidata'], [expected[0], expected[2]]) # test fetch multiple regions but one is not existing - r = fetch('11111,55555') - self.assertEqual(r['message'], 'success') - self.assertEqual(r['epidata'], expected[0:1]) + r = fetch(f'{MSA[0]},11111') + self.assertEqual(r['message'], 'Invalid geo_value(s) 11111 for the requested geo_type msa') # test fetch empty region r = fetch('') + self.assertEqual(r['message'], 'geo_value is empty for the requested geo_type msa!') + # test a region that has no results + r = fetch(MSA[3]) self.assertEqual(r['message'], 'no results') def test_location_timeline(self): diff --git a/requirements.api.txt b/requirements.api.txt index 6ccafc1e1..6615f34f4 100644 --- a/requirements.api.txt +++ b/requirements.api.txt @@ -1,3 +1,4 @@ +delphi_utils==0.3.6 epiweeks==2.1.2 Flask==2.2.2 itsdangerous<2.1 diff --git a/src/acquisition/covidcast/test_utils.py b/src/acquisition/covidcast/test_utils.py index 5e6fc7cfc..6e77aba22 100644 --- a/src/acquisition/covidcast/test_utils.py +++ b/src/acquisition/covidcast/test_utils.py @@ -14,6 +14,11 @@ # all the Nans we use here are just one value, so this is a shortcut to it: nmv = Nans.NOT_MISSING.value +# TODO replace these real geo_values with fake values, and use patch and mock to mock the return values of +# delphi_utils.geomap.GeoMapper().get_geo_values(geo_type) in parse_geo_sets() of _params.py + +FIPS = ['04019', '19143', '29063', '36083'] # Example list of valid FIPS codes as strings +MSA = ['40660', '44180', '48620', '49420'] # Example list of valid MSA codes as strings class CovidcastTestRow(CovidcastRow): @staticmethod @@ -22,9 +27,9 @@ def make_default_row(**kwargs) -> "CovidcastTestRow": "source": "src", "signal": "sig", "time_type": "day", - "geo_type": "county", + "geo_type": "msa", "time_value": 2020_02_02, - "geo_value": "01234", + "geo_value": MSA[0], "value": 10.0, "stderr": 10.0, "sample_size": 10.0, diff --git a/src/server/_params.py b/src/server/_params.py index d0b1cda6d..41f5ce494 100644 --- a/src/server/_params.py +++ b/src/server/_params.py @@ -2,6 +2,7 @@ import re from dataclasses import dataclass from typing import List, Optional, Sequence, Tuple, Union +import delphi_utils from flask import request @@ -53,6 +54,17 @@ class GeoSet: geo_type: str geo_values: Union[bool, Sequence[str]] + def __init__(self, geo_type: str, geo_values: Union[bool, Sequence[str]]): + if not isinstance(geo_values, bool): + if geo_values == ['']: + raise ValidationFailedException(f"geo_value is empty for the requested geo_type {geo_type}!") + allowed_values = delphi_utils.geomap.GeoMapper().get_geo_values(geo_type) + invalid_values = set(geo_values) - set(allowed_values) + if invalid_values: + raise ValidationFailedException(f"Invalid geo_value(s) {', '.join(invalid_values)} for the requested geo_type {geo_type}") + self.geo_type = geo_type + self.geo_values = geo_values + def matches(self, geo_type: str, geo_value: str) -> bool: return self.geo_type == geo_type and (self.geo_values is True or (not isinstance(self.geo_values, bool) and geo_value in self.geo_values)) @@ -460,6 +472,7 @@ def parse_source_signal_sets() -> List[SourceSignalSet]: def parse_geo_sets() -> List[GeoSet]: geo_type = request.values.get("geo_type") + if geo_type: # old version require_any(request, "geo_value", "geo_values", empty=True) diff --git a/tests/common/test_covidcast_row.py b/tests/common/test_covidcast_row.py index f6ce2ec2d..834a7852d 100644 --- a/tests/common/test_covidcast_row.py +++ b/tests/common/test_covidcast_row.py @@ -12,6 +12,7 @@ covidcast_rows_from_args, transpose_dict, ) +from delphi.epidata.acquisition.covidcast.test_utils import MSA # py3tester coverage target (equivalent to `import *`) __test_target__ = 'delphi.epidata.common.covidcast_row' @@ -22,9 +23,9 @@ class TestCovidcastRows(unittest.TestCase): "source": ["src"] * 10, "signal": ["sig_base"] * 5 + ["sig_other"] * 5, "time_type": ["day"] * 10, - "geo_type": ["county"] * 10, + "geo_type": ["msa"] * 10, "time_value": [2021_05_01 + i for i in range(5)] * 2, - "geo_value": ["01234"] * 10, + "geo_value": [MSA[0]] * 10, "value": range(10), "stderr": [10.0] * 10, "sample_size": [10.0] * 10, diff --git a/tests/server/test_params.py b/tests/server/test_params.py index 177ff5cba..1a401efe2 100644 --- a/tests/server/test_params.py +++ b/tests/server/test_params.py @@ -28,6 +28,7 @@ from delphi.epidata.server._exceptions import ( ValidationFailedException, ) +from delphi.epidata.acquisition.covidcast.test_utils import FIPS, MSA # py3tester coverage target __test_target__ = "delphi.epidata.server._params" @@ -45,19 +46,19 @@ def setUp(self): def test_geo_set(self): with self.subTest("*"): - p = GeoSet("hrr", True) - self.assertTrue(p.matches("hrr", "any")) + p = GeoSet("fips", True) + self.assertTrue(p.matches("fips", "any")) self.assertFalse(p.matches("msa", "any")) with self.subTest("subset"): - p = GeoSet("hrr", ["a", "b"]) - self.assertTrue(p.matches("hrr", "a")) - self.assertTrue(p.matches("hrr", "b")) - self.assertFalse(p.matches("hrr", "c")) + p = GeoSet("fips", [FIPS[0], FIPS[1]]) + self.assertTrue(p.matches("fips", FIPS[0])) + self.assertTrue(p.matches("fips", FIPS[1])) + self.assertFalse(p.matches("fips", "c")) self.assertFalse(p.matches("msa", "any")) with self.subTest("count"): self.assertEqual(GeoSet("a", True).count(), inf) self.assertEqual(GeoSet("a", False).count(), 0) - self.assertEqual(GeoSet("a", ["a", "b"]).count(), 2) + self.assertEqual(GeoSet("fips", [FIPS[0], FIPS[1]]).count(), 2) def test_source_signal_set(self): with self.subTest("*"): @@ -89,43 +90,43 @@ def test_parse_geo_arg(self): with app.test_request_context("/"): self.assertEqual(parse_geo_arg(), []) with self.subTest("single"): - with app.test_request_context("/?geo=state:*"): - self.assertEqual(parse_geo_arg(), [GeoSet("state", True)]) - with app.test_request_context("/?geo=state:AK"): - self.assertEqual(parse_geo_arg(), [GeoSet("state", ["ak"])]) + with app.test_request_context("/?geo=fips:*"): + self.assertEqual(parse_geo_arg(), [GeoSet("fips", True)]) + with app.test_request_context(f"/?geo=fips:{FIPS[0]}"): + self.assertEqual(parse_geo_arg(), [GeoSet("fips", [FIPS[0]])]) with self.subTest("single list"): - with app.test_request_context("/?geo=state:AK,TK"): - self.assertEqual(parse_geo_arg(), [GeoSet("state", ["ak", "tk"])]) + with app.test_request_context(f"/?geo=fips:{FIPS[0]},{FIPS[1]}"): + self.assertEqual(parse_geo_arg(), [GeoSet("fips", [FIPS[0], FIPS[1]])]) with self.subTest("multi"): - with app.test_request_context("/?geo=state:*;nation:*"): - self.assertEqual(parse_geo_arg(), [GeoSet("state", True), GeoSet("nation", True)]) - with app.test_request_context("/?geo=state:AK;nation:US"): + with app.test_request_context("/?geo=fips:*;msa:*"): + self.assertEqual(parse_geo_arg(), [GeoSet("fips", True), GeoSet("msa", True)]) + with app.test_request_context(f"/?geo=fips:{FIPS[0]};msa:{MSA[0]}"): self.assertEqual( parse_geo_arg(), - [GeoSet("state", ["ak"]), GeoSet("nation", ["us"])], + [GeoSet("fips", [FIPS[0]]), GeoSet("msa", [MSA[0]])], ) - with app.test_request_context("/?geo=state:AK;state:KY"): + with app.test_request_context(f"/?geo=fips:{FIPS[0]};fips:{FIPS[1]}"): self.assertEqual( parse_geo_arg(), - [GeoSet("state", ["ak"]), GeoSet("state", ["ky"])], + [GeoSet("fips", [FIPS[0]]), GeoSet("fips", [FIPS[1]])], ) with self.subTest("multi list"): - with app.test_request_context("/?geo=state:AK,TK;county:42003,40556"): + with app.test_request_context(f"/?geo=fips:{FIPS[0]},{FIPS[1]};msa:{MSA[0]},{MSA[1]}"): self.assertEqual( parse_geo_arg(), [ - GeoSet("state", ["ak", "tk"]), - GeoSet("county", ["42003", "40556"]), + GeoSet("fips", [FIPS[0], FIPS[1]]), + GeoSet("msa", [MSA[0], MSA[1]]), ], ) with self.subTest("hybrid"): - with app.test_request_context("/?geo=nation:*;state:PA;county:42003,42002"): + with app.test_request_context(f"/?geo=nation:*;fips:{FIPS[0]};msa:{MSA[0]},{MSA[1]}"): self.assertEqual( parse_geo_arg(), [ GeoSet("nation", True), - GeoSet("state", ["pa"]), - GeoSet("county", ["42003", "42002"]), + GeoSet("fips", [FIPS[0]]), + GeoSet("msa", [MSA[0], MSA[1]]), ], ) @@ -140,10 +141,10 @@ def test_single_parse_geo_arg(self): with app.test_request_context("/"): self.assertRaises(ValidationFailedException, parse_single_geo_arg, "geo") with self.subTest("single"): - with app.test_request_context("/?geo=state:AK"): - self.assertEqual(parse_single_geo_arg("geo"), GeoSet("state", ["ak"])) + with app.test_request_context(f"/?geo=fips:{FIPS[0]}"): + self.assertEqual(parse_single_geo_arg("geo"), GeoSet("fips", [FIPS[0]])) with self.subTest("single list"): - with app.test_request_context("/?geo=state:AK,TK"): + with app.test_request_context(f"/?geo=fips:{FIPS[0]},{FIPS[1]}"): self.assertRaises(ValidationFailedException, parse_single_geo_arg, "geo") with self.subTest("multi"): with app.test_request_context("/?geo=state:*;nation:*"): diff --git a/tests/server/test_query.py b/tests/server/test_query.py index 53aca5621..ec07d3e8b 100644 --- a/tests/server/test_query.py +++ b/tests/server/test_query.py @@ -21,6 +21,7 @@ TimeSet, SourceSignalSet, ) +from delphi.epidata.acquisition.covidcast.test_utils import FIPS, MSA # py3tester coverage target __test_target__ = "delphi.epidata.server._query" @@ -145,17 +146,17 @@ def test_filter_geo_sets(self): with self.subTest("single"): params = {} self.assertEqual( - filter_geo_sets("t", "v", [GeoSet("state", ["KY"])], "p", params), + filter_geo_sets("t", "v", [GeoSet("fips", [FIPS[0]])], "p", params), "((t = :p_0t AND (v = :p_0t_0)))", ) - self.assertEqual(params, {"p_0t": "state", "p_0t_0": "KY"}) + self.assertEqual(params, {"p_0t": "fips", "p_0t_0": FIPS[0]}) with self.subTest("multi"): params = {} self.assertEqual( - filter_geo_sets("t", "v", [GeoSet("state", ["KY", "AK"])], "p", params), + filter_geo_sets("t", "v", [GeoSet("fips", [FIPS[0], FIPS[1]])], "p", params), "((t = :p_0t AND (v = :p_0t_0 OR v = :p_0t_1)))", ) - self.assertEqual(params, {"p_0t": "state", "p_0t_0": "KY", "p_0t_1": "AK"}) + self.assertEqual(params, {"p_0t": "fips", "p_0t_0": FIPS[0], "p_0t_1": FIPS[1]}) with self.subTest("multiple pairs"): params = {} self.assertEqual( @@ -175,7 +176,7 @@ def test_filter_geo_sets(self): filter_geo_sets( "t", "v", - [GeoSet("state", ["AK"]), GeoSet("nation", ["US"])], + [GeoSet("fips", [FIPS[0]]), GeoSet("msa", [MSA[0]])], "p", params, ), @@ -183,7 +184,7 @@ def test_filter_geo_sets(self): ) self.assertEqual( params, - {"p_0t": "state", "p_0t_0": "AK", "p_1t": "nation", "p_1t_0": "US"}, + {"p_0t": "fips", "p_0t_0": FIPS[0], "p_1t": "msa", "p_1t_0": MSA[0]}, ) def test_filter_source_signal_sets(self):