Skip to content

Commit ad32bd3

Browse files
committed
Add tests for geo_id value checks
1 parent 45046cb commit ad32bd3

File tree

1 file changed

+89
-8
lines changed

1 file changed

+89
-8
lines changed

validator/tests/test_checks.py

Lines changed: 89 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,21 +146,21 @@ def test_expected_groups(self):
146146
assert pattern_found["signal"] == "signal_signal"
147147

148148

149-
class TestCheckBadGeoId:
149+
class TestCheckBadGeoIdFormat:
150150
params = {"data_source": "", "span_length": 0,
151151
"end_date": "2020-09-02", "expected_lag": {}}
152152

153153
def test_empty_df(self):
154154
validator = Validator(self.params)
155155
empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
156-
validator.check_bad_geo_id(empty_df, "name", "county")
156+
validator.check_bad_geo_id_format(empty_df, "name", "county")
157157

158158
assert len(validator.raised_errors) == 0
159159

160160
def test_invalid_geo_type(self):
161161
validator = Validator(self.params)
162162
empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
163-
validator.check_bad_geo_id(empty_df, "name", "hello")
163+
validator.check_bad_geo_id_format(empty_df, "name", "hello")
164164

165165
assert len(validator.raised_errors) == 1
166166
assert "check_geo_type" in [
@@ -173,7 +173,7 @@ def test_invalid_geo_id_county(self):
173173
validator = Validator(self.params)
174174
df = pd.DataFrame(["0", "54321", "123", ".0000",
175175
"abc12"], columns=["geo_id"])
176-
validator.check_bad_geo_id(df, "name", "county")
176+
validator.check_bad_geo_id_format(df, "name", "county")
177177

178178
assert len(validator.raised_errors) == 1
179179
assert "check_geo_id_format" in validator.raised_errors[0].check_data_id
@@ -184,7 +184,7 @@ def test_invalid_geo_id_msa(self):
184184
validator = Validator(self.params)
185185
df = pd.DataFrame(["0", "54321", "123", ".0000",
186186
"abc12"], columns=["geo_id"])
187-
validator.check_bad_geo_id(df, "name", "msa")
187+
validator.check_bad_geo_id_format(df, "name", "msa")
188188

189189
assert len(validator.raised_errors) == 1
190190
assert "check_geo_id_format" in validator.raised_errors[0].check_data_id
@@ -195,7 +195,7 @@ def test_invalid_geo_id_hrr(self):
195195
validator = Validator(self.params)
196196
df = pd.DataFrame(["1", "12", "123", "1234", "12345",
197197
"a", ".", "ab1"], columns=["geo_id"])
198-
validator.check_bad_geo_id(df, "name", "hrr")
198+
validator.check_bad_geo_id_format(df, "name", "hrr")
199199

200200
assert len(validator.raised_errors) == 1
201201
assert "check_geo_id_format" in validator.raised_errors[0].check_data_id
@@ -208,7 +208,7 @@ def test_invalid_geo_id_state(self):
208208
validator = Validator(self.params)
209209
df = pd.DataFrame(["aa", "hi", "HI", "hawaii",
210210
"Hawaii", "a", "H.I."], columns=["geo_id"])
211-
validator.check_bad_geo_id(df, "name", "state")
211+
validator.check_bad_geo_id_format(df, "name", "state")
212212

213213
assert len(validator.raised_errors) == 1
214214
assert "check_geo_id_format" in validator.raised_errors[0].check_data_id
@@ -221,7 +221,7 @@ def test_invalid_geo_id_national(self):
221221
validator = Validator(self.params)
222222
df = pd.DataFrame(["usa", "SP", " us", "us",
223223
"usausa", "US"], columns=["geo_id"])
224-
validator.check_bad_geo_id(df, "name", "national")
224+
validator.check_bad_geo_id_format(df, "name", "national")
225225

226226
assert len(validator.raised_errors) == 1
227227
assert "check_geo_id_format" in validator.raised_errors[0].check_data_id
@@ -230,6 +230,87 @@ def test_invalid_geo_id_national(self):
230230
assert "US" not in validator.raised_errors[0].expression
231231
assert "SP" not in validator.raised_errors[0].expression
232232

233+
class TestCheckBadGeoIdValue:
234+
params = {"data_source": "", "span_length": 0,
235+
"end_date": "2020-09-02", "expected_lag": {}}
236+
237+
def test_empty_df(self):
238+
validator = Validator(self.params)
239+
empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
240+
validator.check_bad_geo_id_value(empty_df, "name", "county")
241+
assert len(validator.raised_errors) == 0
242+
243+
def test_invalid_geo_id_county(self):
244+
validator = Validator(self.params)
245+
df = pd.DataFrame(["01001", "88888", "99999"], columns=["geo_id"])
246+
validator.check_bad_geo_id_value(df, "name", "county")
247+
248+
assert len(validator.raised_errors) == 1
249+
assert "check_bad_geo_id_value" in validator.raised_errors[0].check_data_id
250+
assert len(validator.raised_errors[0].expression) == 2
251+
assert "01001" not in validator.raised_errors[0].expression
252+
assert "88888" in validator.raised_errors[0].expression
253+
assert "99999" in validator.raised_errors[0].expression
254+
255+
def test_invalid_geo_id_msa(self):
256+
validator = Validator(self.params)
257+
df = pd.DataFrame(["10180", "88888", "99999"], columns=["geo_id"])
258+
validator.check_bad_geo_id_value(df, "name", "msa")
259+
260+
assert len(validator.raised_errors) == 1
261+
assert "check_bad_geo_id_value" in validator.raised_errors[0].check_data_id
262+
assert len(validator.raised_errors[0].expression) == 2
263+
assert "10180" not in validator.raised_errors[0].expression
264+
assert "88888" in validator.raised_errors[0].expression
265+
assert "99999" in validator.raised_errors[0].expression
266+
267+
def test_invalid_geo_id_hrr(self):
268+
validator = Validator(self.params)
269+
df = pd.DataFrame(["1", "11", "111", "8", "88", "888"], columns=["geo_id"])
270+
validator.check_bad_geo_id_value(df, "name", "hrr")
271+
272+
assert len(validator.raised_errors) == 1
273+
assert "check_geo_id_value" in validator.raised_errors[0].check_data_id
274+
assert len(validator.raised_errors[0].expression) == 3
275+
assert "1" not in validator.raised_errors[0].expression
276+
assert "11" not in validator.raised_errors[0].expression
277+
assert "111" not in validator.raised_errors[0].expression
278+
assert "8" in validator.raised_errors[0].expression
279+
assert "88" in validator.raised_errors[0].expression
280+
assert "888" in validator.raised_errors[0].expression
281+
282+
def test_invalid_geo_id_state(self):
283+
validator = Validator(self.params)
284+
df = pd.DataFrame(["aa", "ak"], columns=["geo_id"])
285+
validator.check_bad_geo_id_value(df, "name", "state")
286+
287+
assert len(validator.raised_errors) == 1
288+
assert "check_geo_id_value" in validator.raised_errors[0].check_data_id
289+
assert len(validator.raised_errors[0].expression) == 1
290+
assert "ak" not in validator.raised_errors[0].expression
291+
assert "aa" in validator.raised_errors[0].expression
292+
293+
def test_uppercase_geo_id(self):
294+
validator = Validator(self.params)
295+
df = pd.DataFrame(["ak", "AK"], columns=["geo_id"])
296+
validator.check_bad_geo_id_value(df, "name", "state")
297+
298+
assert len(validator.raised_errors) == 0
299+
assert len(validator.raised_warnings) == 1
300+
assert "check_geo_id_lowercase" in validator.raised_warnings[0].check_data_id
301+
assert "AK" in validator.raised_warnings[0].expression
302+
303+
def test_invalid_geo_id_national(self):
304+
validator = Validator(self.params)
305+
df = pd.DataFrame(["us", "zz"], columns=["geo_id"])
306+
validator.check_bad_geo_id_value(df, "name", "national")
307+
308+
assert len(validator.raised_errors) == 1
309+
assert "check_geo_id_value" in validator.raised_errors[0].check_data_id
310+
assert len(validator.raised_errors[0].expression) == 1
311+
assert "us" not in validator.raised_errors[0].expression
312+
assert "zz" in validator.raised_errors[0].expression
313+
233314

234315
class TestCheckBadVal:
235316
params = {"data_source": "", "span_length": 1,

0 commit comments

Comments
 (0)