Skip to content

Leonlu2/error on geo values #1095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
dd8c5c8
1. add geo_value validator in _params
LeonLu2 Feb 20, 2023
d024995
1. minor comment change
LeonLu2 Feb 23, 2023
a6cc058
1. Merge branch 'dev' into leonlu2/error_on_geo_values.
LeonLu2 Feb 23, 2023
6ed27d9
Update integrations/server/test_covidcast.py
LeonLu2 Feb 24, 2023
e89692e
Update integrations/server/test_covidcast.py
LeonLu2 Feb 24, 2023
14463c8
Update integrations/client/test_delphi_epidata.py
LeonLu2 Mar 2, 2023
3648a74
1. put the geo validation in GeoSet class constructor, and show which…
LeonLu2 Mar 3, 2023
e884b7c
Merge branch 'leonlu2/error_on_geo_values' of https://github.com/cmu-…
LeonLu2 Mar 3, 2023
9fe6196
Update src/server/_params.py
LeonLu2 Mar 20, 2023
6dac204
Update src/server/_params.py
LeonLu2 Mar 20, 2023
7b7b5e7
Update src/acquisition/covidcast/test_utils.py
LeonLu2 Mar 20, 2023
0a2bdf9
Update integrations/client/test_delphi_epidata.py
LeonLu2 Mar 20, 2023
ef362d8
Update integrations/server/test_covidcast.py
LeonLu2 Mar 20, 2023
6e29946
Update integrations/server/test_covidcast.py
LeonLu2 Mar 20, 2023
e3fe68b
Update tests/server/test_params.py
LeonLu2 Mar 20, 2023
43a232e
1. minor changes
LeonLu2 Mar 20, 2023
e3054cd
Merge branch 'dev' into leonlu2/error_on_geo_values
LeonLu2 Mar 20, 2023
1e75209
1. edit geo_type to match with what we have in make_default_row
LeonLu2 Mar 20, 2023
5fed93c
Merge branch 'dev' into leonlu2/error_on_geo_values
LeonLu2 Mar 20, 2023
79cdc26
Update integrations/server/test_covidcast.py
LeonLu2 Mar 21, 2023
bec5621
Update integrations/server/test_covidcast.py
LeonLu2 Mar 21, 2023
4578420
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
4a59ead
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
00b4ff2
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
58fcc7f
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
94e8b5f
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
59ce3c9
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
d41eeaa
Update tests/server/test_params.py
LeonLu2 Mar 21, 2023
b01bb82
Update test_covidcast.py
LeonLu2 Mar 21, 2023
e2d12ee
Update integrations/server/test_covidcast.py
LeonLu2 Mar 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions integrations/client/test_delphi_epidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
# third party
import delphi.operations.secrets as secrets
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA
from delphi.epidata.client.delphi_epidata import Epidata
from delphi_utils import Nans


# py3tester coverage target
__test_target__ = 'delphi.epidata.client.delphi_epidata'
# all the Nans we use here are just one value, so this is a shortcut to it:
Expand Down Expand Up @@ -219,10 +218,10 @@ def test_geo_value(self):
# insert placeholder data: three counties, three MSAs
N = 3
rows = [
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
CovidcastTestRow.make_default_row(geo_type="fips", geo_value=FIPS[i], value=i)
for i in range(N)
] + [
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=MSA[i], value=i*10)
for i in range(N)
]
self._insert_rows(rows)
Expand All @@ -236,31 +235,34 @@ def fetch(geo):
**self.params_from_row(rows[0], geo_value=geo)
)

self.maxDiff = None
# test fetch all
request = fetch('*')
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], counties)
# test fetch a specific region
request = fetch('11111')
request = fetch([FIPS[0]])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[1]])
self.assertEqual(request['epidata'], [counties[0]])
# test fetch a specific yet not existing region
request = fetch('55555')
self.assertEqual(request['message'], 'no results')
self.assertEqual(request['message'], 'Invalid geo_value(s) 55555 for the requested geo_type fips')
# test fetch a multiple regions
request = fetch(['11111', '22222'])
request = fetch([FIPS[0], FIPS[1]])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[1], counties[2]])
self.assertEqual(request['epidata'], [counties[0], counties[1]])
# test fetch a multiple regions in another variant
request = fetch(['00000', '22222'])
request = fetch([FIPS[0], FIPS[2]])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[0], counties[2]])
# test fetch a multiple regions but one is not existing
request = fetch(['11111', '55555'])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[1]])
request = fetch([FIPS[0], '55555'])
self.assertEqual(request['message'], 'Invalid geo_value(s) 55555 for the requested geo_type fips')
# test fetch a multiple regions but specify no region
request = fetch([])
self.assertEqual(request['message'], 'geo_value is empty for the requested geo_type fips!')
# test fetch a region with no results
request = fetch([FIPS[3]])
self.assertEqual(request['message'], 'no results')

def test_covidcast_meta(self):
Expand Down Expand Up @@ -325,10 +327,10 @@ def test_async_epidata(self):
# insert placeholder data: three counties, three MSAs
N = 3
rows = [
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
CovidcastTestRow.make_default_row(geo_type="fips", geo_value=FIPS[i-1], value=i)
for i in range(N)
] + [
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=MSA[i-1], value=i*10)
for i in range(N)
]
self._insert_rows(rows)
Expand Down
33 changes: 17 additions & 16 deletions integrations/server/test_covidcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# first party
from delphi_utils import Nans
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA
from delphi.epidata.client.delphi_epidata import Epidata

# use the local instance of the Epidata API
Expand All @@ -37,23 +37,22 @@ def _insert_placeholder_set_one(self):

def _insert_placeholder_set_two(self):
rows = [
CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.)
CovidcastTestRow.make_default_row(geo_type='msa', geo_value=MSA[i-1], value=i*1., stderr=i*10., sample_size=i*100.)
for i in [1, 2, 3]
] + [
# geo value intended to overlap with counties above
CovidcastTestRow.make_default_row(geo_type='msa', geo_value=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.)
CovidcastTestRow.make_default_row(geo_type='fips', geo_value=FIPS[i-4], value=i*1., stderr=i*10., sample_size=i*100.)
for i in [4, 5, 6]
]
self._insert_rows(rows)
return rows

def _insert_placeholder_set_three(self):
rows = [
CovidcastTestRow.make_default_row(geo_type='county', geo_value='11111', time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i)
CovidcastTestRow.make_default_row(geo_value=MSA[0], time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i)
for i in [1, 2, 3]
] + [
# time value intended to overlap with 11111 above, with disjoint geo values
CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i)
# time value intended to overlap with the time values above, with disjoint geo values
CovidcastTestRow.make_default_row(geo_value=str(i)*5, time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it your intention to make rows with invalid geo_values here? maybe you want to use some from FIPS instead?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why dont we see the "invalid geo" messages when we use this row set?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sorry that I overlooked these comments, somehow it did not show on the conversation page. Yeah I think using some FIPS here would be good.
I think we don't see "invalid geo" because the make_default_row(base function here) does not go through the geo validator in the server maybe?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make_default_row() doesnt check them, youre right. it looks like whenever sample rows from _insert_placeholder_set_three are used, certain subsets of them are isolated and sometimes the geo_values are changed before theyre passed to request_based_on_row() where they are actually checked for geo validation, and bad geo_values were never sent in a request.

for i in [4, 5, 6]
]
self._insert_rows(rows)
Expand Down Expand Up @@ -295,7 +294,7 @@ def test_signal_wildcard(self):
})

def test_geo_value(self):
"""test different variants of geo types: single, *, multi."""
"""test whether geo values are valid for specific geo types"""

# insert placeholder data
rows = self._insert_placeholder_set_two()
Expand All @@ -308,26 +307,28 @@ def fetch(geo_value):
return response

# test fetch a specific region
r = fetch('11111')
r = fetch(MSA[0])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], expected[0:1])
# test fetch a specific yet not existing region
r = fetch('55555')
self.assertEqual(r['message'], 'no results')
r = fetch('11111')
self.assertEqual(r['message'], 'Invalid geo_value(s) 11111 for the requested geo_type msa')
# test fetch multiple regions
r = fetch('11111,22222')
r = fetch('{},{}'.format(MSA[0], MSA[1]))
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], expected[0:2])
# test fetch multiple noncontiguous regions
r = fetch('11111,33333')
r = fetch('{},{}'.format(MSA[0], MSA[2]))
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [expected[0], expected[2]])
# test fetch multiple regions but one is not existing
r = fetch('11111,55555')
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], expected[0:1])
r = fetch('{},11111'.format(MSA[0]))
self.assertEqual(r['message'], 'Invalid geo_value(s) 11111 for the requested geo_type msa')
# test fetch empty region
r = fetch('')
self.assertEqual(r['message'], 'geo_value is empty for the requested geo_type msa!')
# test a region that has no results
r = fetch(MSA[3])
self.assertEqual(r['message'], 'no results')

def test_location_timeline(self):
Expand Down
1 change: 1 addition & 0 deletions requirements.api.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
delphi_utils==0.3.6
epiweeks==2.1.2
Flask==2.2.2
itsdangerous<2.1
Expand Down
9 changes: 7 additions & 2 deletions src/acquisition/covidcast/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
# all the Nans we use here are just one value, so this is a shortcut to it:
nmv = Nans.NOT_MISSING.value

# TODO replace these real geo_values with fake values, and use patch and mock to mock the return values of
# delphi_utils.geomap.GeoMapper().get_geo_values(geo_type) in parse_geo_sets() of _params.py

FIPS = ['04019', '19143', '29063', '36083'] # Example list of valid FIPS codes as strings
MSA = ['40660', '44180', '48620', '49420'] # Example list of valid MSA codes as strings

class CovidcastTestRow(CovidcastRow):
@staticmethod
Expand All @@ -22,9 +27,9 @@ def make_default_row(**kwargs) -> "CovidcastTestRow":
"source": "src",
"signal": "sig",
"time_type": "day",
"geo_type": "county",
"geo_type": "msa",
"time_value": 2020_02_02,
"geo_value": "01234",
"geo_value": "40660", # a valid geo_value for msa
"value": 10.0,
"stderr": 10.0,
"sample_size": 10.0,
Expand Down
19 changes: 19 additions & 0 deletions src/server/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from dataclasses import dataclass
from typing import List, Optional, Sequence, Tuple, Union
import delphi_utils

from flask import request

Expand Down Expand Up @@ -53,6 +54,17 @@ class GeoSet:
geo_type: str
geo_values: Union[bool, Sequence[str]]

def __init__(self, geo_type: str, geo_values: Union[bool, Sequence[str]]):
if not isinstance(geo_values, bool):
if geo_values == ['']:
raise ValidationFailedException(f"geo_value is empty for the requested geo_type {geo_type}!")
allowed_values = delphi_utils.geomap.GeoMapper().get_geo_values(geo_type)
invalid_values = set(geo_values) - set(allowed_values)
if invalid_values:
raise ValidationFailedException(f"Invalid geo_value(s) {', '.join(invalid_values)} for the requested geo_type {geo_type}")
self.geo_type = geo_type
self.geo_values = geo_values

def matches(self, geo_type: str, geo_value: str) -> bool:
return self.geo_type == geo_type and (self.geo_values is True or (not isinstance(self.geo_values, bool) and geo_value in self.geo_values))

Expand Down Expand Up @@ -460,14 +472,21 @@ def parse_source_signal_sets() -> List[SourceSignalSet]:

def parse_geo_sets() -> List[GeoSet]:
geo_type = request.values.get("geo_type")

if geo_type:
# old version
require_any(request, "geo_value", "geo_values", empty=True)
geo_values = extract_strings(("geo_values", "geo_value"))
if len(geo_values) == 1 and geo_values[0] == "*":
return [GeoSet(geo_type, True)]

# for geo_value in geo_values:
# if geo_value not in delphi_utils.geomap.GeoMapper().get_geo_values(geo_type):
# raise ValidationFailedException("invalid geo_value for the requested geo_type")

return [GeoSet(geo_type, geo_values)]


if ":" not in request.values.get("geo", ""):
raise ValidationFailedException("missing parameter: geo or (geo_type and geo_value[s])")

Expand Down
4 changes: 2 additions & 2 deletions tests/acquisition/covidcast/test_covidcast_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ class TestCovidcastRows(unittest.TestCase):
"source": ["src"] * 10,
"signal": ["sig_base"] * 5 + ["sig_other"] * 5,
"time_type": ["day"] * 10,
"geo_type": ["county"] * 10,
"geo_type": ["msa"] * 10,
"time_value": [2021_05_01 + i for i in range(5)] * 2,
"geo_value": ["01234"] * 10,
"geo_value": ["40660"] * 10,
"value": range(10),
"stderr": [10.0] * 10,
"sample_size": [10.0] * 10,
Expand Down
57 changes: 29 additions & 28 deletions tests/server/test_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from delphi.epidata.server._exceptions import (
ValidationFailedException,
)
from delphi.epidata.acquisition.covidcast.test_utils import FIPS, MSA

# py3tester coverage target
__test_target__ = "delphi.epidata.server._params"
Expand All @@ -45,19 +46,19 @@ def setUp(self):

def test_geo_set(self):
with self.subTest("*"):
p = GeoSet("hrr", True)
self.assertTrue(p.matches("hrr", "any"))
p = GeoSet("fips", True)
self.assertTrue(p.matches("fips", "any"))
self.assertFalse(p.matches("msa", "any"))
with self.subTest("subset"):
p = GeoSet("hrr", ["a", "b"])
self.assertTrue(p.matches("hrr", "a"))
self.assertTrue(p.matches("hrr", "b"))
self.assertFalse(p.matches("hrr", "c"))
p = GeoSet("fips", [FIPS[0], FIPS[1]])
self.assertTrue(p.matches("fips", FIPS[0]))
self.assertTrue(p.matches("fips", FIPS[1]))
self.assertFalse(p.matches("fips", "c"))
self.assertFalse(p.matches("msa", "any"))
with self.subTest("count"):
self.assertEqual(GeoSet("a", True).count(), inf)
self.assertEqual(GeoSet("a", False).count(), 0)
self.assertEqual(GeoSet("a", ["a", "b"]).count(), 2)
self.assertEqual(GeoSet("fips", [FIPS[0], FIPS[1]]).count(), 2)

def test_source_signal_set(self):
with self.subTest("*"):
Expand Down Expand Up @@ -89,43 +90,43 @@ def test_parse_geo_arg(self):
with app.test_request_context("/"):
self.assertEqual(parse_geo_arg(), [])
with self.subTest("single"):
with app.test_request_context("/?geo=state:*"):
self.assertEqual(parse_geo_arg(), [GeoSet("state", True)])
with app.test_request_context("/?geo=state:AK"):
self.assertEqual(parse_geo_arg(), [GeoSet("state", ["ak"])])
with app.test_request_context("/?geo=fips:*"):
self.assertEqual(parse_geo_arg(), [GeoSet("fips", True)])
with app.test_request_context("/?geo=fips:{}".format(FIPS[0])):
self.assertEqual(parse_geo_arg(), [GeoSet("fips", [FIPS[0]])])
with self.subTest("single list"):
with app.test_request_context("/?geo=state:AK,TK"):
self.assertEqual(parse_geo_arg(), [GeoSet("state", ["ak", "tk"])])
with app.test_request_context("/?geo=fips:{},{}".format(FIPS[0], FIPS[1])):
self.assertEqual(parse_geo_arg(), [GeoSet("fips", [FIPS[0], FIPS[1]])])
with self.subTest("multi"):
with app.test_request_context("/?geo=state:*;nation:*"):
self.assertEqual(parse_geo_arg(), [GeoSet("state", True), GeoSet("nation", True)])
with app.test_request_context("/?geo=state:AK;nation:US"):
with app.test_request_context("/?geo=fips:*;msa:*"):
self.assertEqual(parse_geo_arg(), [GeoSet("fips", True), GeoSet("msa", True)])
with app.test_request_context("/?geo=fips:{};msa:{}".format(FIPS[0], MSA[0])):
self.assertEqual(
parse_geo_arg(),
[GeoSet("state", ["ak"]), GeoSet("nation", ["us"])],
[GeoSet("fips", [FIPS[0]]), GeoSet("msa", [MSA[0]])],
)
with app.test_request_context("/?geo=state:AK;state:KY"):
with app.test_request_context("/?geo=fips:{};fips:{}".format(FIPS[0], FIPS[1])):
self.assertEqual(
parse_geo_arg(),
[GeoSet("state", ["ak"]), GeoSet("state", ["ky"])],
[GeoSet("fips", [FIPS[0]]), GeoSet("fips", [FIPS[1]])],
)
with self.subTest("multi list"):
with app.test_request_context("/?geo=state:AK,TK;county:42003,40556"):
with app.test_request_context("/?geo=fips:{},{};msa:{},{}".format(FIPS[0], FIPS[1], MSA[0], MSA[1])):
self.assertEqual(
parse_geo_arg(),
[
GeoSet("state", ["ak", "tk"]),
GeoSet("county", ["42003", "40556"]),
GeoSet("fips", [FIPS[0], FIPS[1]]),
GeoSet("msa", [MSA[0], MSA[1]]),
],
)
with self.subTest("hybrid"):
with app.test_request_context("/?geo=nation:*;state:PA;county:42003,42002"):
with app.test_request_context("/?geo=nation:*;fips:{};msa:{},{}".format(FIPS[0], MSA[0], MSA[1])):
self.assertEqual(
parse_geo_arg(),
[
GeoSet("nation", True),
GeoSet("state", ["pa"]),
GeoSet("county", ["42003", "42002"]),
GeoSet("fips", [FIPS[0]]),
GeoSet("msa", [MSA[0], MSA[1]]),
],
)

Expand All @@ -140,10 +141,10 @@ def test_single_parse_geo_arg(self):
with app.test_request_context("/"):
self.assertRaises(ValidationFailedException, parse_single_geo_arg, "geo")
with self.subTest("single"):
with app.test_request_context("/?geo=state:AK"):
self.assertEqual(parse_single_geo_arg("geo"), GeoSet("state", ["ak"]))
with app.test_request_context("/?geo=fips:{}".format(FIPS[0])):
self.assertEqual(parse_single_geo_arg("geo"), GeoSet("fips", [FIPS[0]]))
with self.subTest("single list"):
with app.test_request_context("/?geo=state:AK,TK"):
with app.test_request_context("/?geo=fips:{},{}".format(FIPS[0], FIPS[1])):
self.assertRaises(ValidationFailedException, parse_single_geo_arg, "geo")
with self.subTest("multi"):
with app.test_request_context("/?geo=state:*;nation:*"):
Expand Down
Loading