@@ -146,21 +146,21 @@ def test_expected_groups(self):
146
146
assert pattern_found ["signal" ] == "signal_signal"
147
147
148
148
149
- class TestCheckBadGeoId :
149
+ class TestCheckBadGeoIdFormat :
150
150
params = {"data_source" : "" , "span_length" : 0 ,
151
151
"end_date" : "2020-09-02" , "expected_lag" : {}}
152
152
153
153
def test_empty_df (self ):
154
154
validator = Validator (self .params )
155
155
empty_df = pd .DataFrame (columns = ["geo_id" ], dtype = str )
156
- validator .check_bad_geo_id (empty_df , "name" , "county" )
156
+ validator .check_bad_geo_id_format (empty_df , "name" , "county" )
157
157
158
158
assert len (validator .raised_errors ) == 0
159
159
160
160
def test_invalid_geo_type (self ):
161
161
validator = Validator (self .params )
162
162
empty_df = pd .DataFrame (columns = ["geo_id" ], dtype = str )
163
- validator .check_bad_geo_id (empty_df , "name" , "hello" )
163
+ validator .check_bad_geo_id_format (empty_df , "name" , "hello" )
164
164
165
165
assert len (validator .raised_errors ) == 1
166
166
assert "check_geo_type" in [
@@ -173,7 +173,7 @@ def test_invalid_geo_id_county(self):
173
173
validator = Validator (self .params )
174
174
df = pd .DataFrame (["0" , "54321" , "123" , ".0000" ,
175
175
"abc12" ], columns = ["geo_id" ])
176
- validator .check_bad_geo_id (df , "name" , "county" )
176
+ validator .check_bad_geo_id_format (df , "name" , "county" )
177
177
178
178
assert len (validator .raised_errors ) == 1
179
179
assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -184,7 +184,7 @@ def test_invalid_geo_id_msa(self):
184
184
validator = Validator (self .params )
185
185
df = pd .DataFrame (["0" , "54321" , "123" , ".0000" ,
186
186
"abc12" ], columns = ["geo_id" ])
187
- validator .check_bad_geo_id (df , "name" , "msa" )
187
+ validator .check_bad_geo_id_format (df , "name" , "msa" )
188
188
189
189
assert len (validator .raised_errors ) == 1
190
190
assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -195,7 +195,7 @@ def test_invalid_geo_id_hrr(self):
195
195
validator = Validator (self .params )
196
196
df = pd .DataFrame (["1" , "12" , "123" , "1234" , "12345" ,
197
197
"a" , "." , "ab1" ], columns = ["geo_id" ])
198
- validator .check_bad_geo_id (df , "name" , "hrr" )
198
+ validator .check_bad_geo_id_format (df , "name" , "hrr" )
199
199
200
200
assert len (validator .raised_errors ) == 1
201
201
assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -208,7 +208,7 @@ def test_invalid_geo_id_state(self):
208
208
validator = Validator (self .params )
209
209
df = pd .DataFrame (["aa" , "hi" , "HI" , "hawaii" ,
210
210
"Hawaii" , "a" , "H.I." ], columns = ["geo_id" ])
211
- validator .check_bad_geo_id (df , "name" , "state" )
211
+ validator .check_bad_geo_id_format (df , "name" , "state" )
212
212
213
213
assert len (validator .raised_errors ) == 1
214
214
assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -221,7 +221,7 @@ def test_invalid_geo_id_national(self):
221
221
validator = Validator (self .params )
222
222
df = pd .DataFrame (["usa" , "SP" , " us" , "us" ,
223
223
"usausa" , "US" ], columns = ["geo_id" ])
224
- validator .check_bad_geo_id (df , "name" , "national" )
224
+ validator .check_bad_geo_id_format (df , "name" , "national" )
225
225
226
226
assert len (validator .raised_errors ) == 1
227
227
assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -230,6 +230,87 @@ def test_invalid_geo_id_national(self):
230
230
assert "US" not in validator .raised_errors [0 ].expression
231
231
assert "SP" not in validator .raised_errors [0 ].expression
232
232
233
+ class TestCheckBadGeoIdValue :
234
+ params = {"data_source" : "" , "span_length" : 0 ,
235
+ "end_date" : "2020-09-02" , "expected_lag" : {}}
236
+
237
+ def test_empty_df (self ):
238
+ validator = Validator (self .params )
239
+ empty_df = pd .DataFrame (columns = ["geo_id" ], dtype = str )
240
+ validator .check_bad_geo_id_value (empty_df , "name" , "county" )
241
+ assert len (validator .raised_errors ) == 0
242
+
243
+ def test_invalid_geo_id_county (self ):
244
+ validator = Validator (self .params )
245
+ df = pd .DataFrame (["01001" , "88888" , "99999" ], columns = ["geo_id" ])
246
+ validator .check_bad_geo_id_value (df , "name" , "county" )
247
+
248
+ assert len (validator .raised_errors ) == 1
249
+ assert "check_bad_geo_id_value" in validator .raised_errors [0 ].check_data_id
250
+ assert len (validator .raised_errors [0 ].expression ) == 2
251
+ assert "01001" not in validator .raised_errors [0 ].expression
252
+ assert "88888" in validator .raised_errors [0 ].expression
253
+ assert "99999" in validator .raised_errors [0 ].expression
254
+
255
+ def test_invalid_geo_id_msa (self ):
256
+ validator = Validator (self .params )
257
+ df = pd .DataFrame (["10180" , "88888" , "99999" ], columns = ["geo_id" ])
258
+ validator .check_bad_geo_id_value (df , "name" , "msa" )
259
+
260
+ assert len (validator .raised_errors ) == 1
261
+ assert "check_bad_geo_id_value" in validator .raised_errors [0 ].check_data_id
262
+ assert len (validator .raised_errors [0 ].expression ) == 2
263
+ assert "10180" not in validator .raised_errors [0 ].expression
264
+ assert "88888" in validator .raised_errors [0 ].expression
265
+ assert "99999" in validator .raised_errors [0 ].expression
266
+
267
+ def test_invalid_geo_id_hrr (self ):
268
+ validator = Validator (self .params )
269
+ df = pd .DataFrame (["1" , "11" , "111" , "8" , "88" , "888" ], columns = ["geo_id" ])
270
+ validator .check_bad_geo_id_value (df , "name" , "hrr" )
271
+
272
+ assert len (validator .raised_errors ) == 1
273
+ assert "check_geo_id_value" in validator .raised_errors [0 ].check_data_id
274
+ assert len (validator .raised_errors [0 ].expression ) == 3
275
+ assert "1" not in validator .raised_errors [0 ].expression
276
+ assert "11" not in validator .raised_errors [0 ].expression
277
+ assert "111" not in validator .raised_errors [0 ].expression
278
+ assert "8" in validator .raised_errors [0 ].expression
279
+ assert "88" in validator .raised_errors [0 ].expression
280
+ assert "888" in validator .raised_errors [0 ].expression
281
+
282
+ def test_invalid_geo_id_state (self ):
283
+ validator = Validator (self .params )
284
+ df = pd .DataFrame (["aa" , "ak" ], columns = ["geo_id" ])
285
+ validator .check_bad_geo_id_value (df , "name" , "state" )
286
+
287
+ assert len (validator .raised_errors ) == 1
288
+ assert "check_geo_id_value" in validator .raised_errors [0 ].check_data_id
289
+ assert len (validator .raised_errors [0 ].expression ) == 1
290
+ assert "ak" not in validator .raised_errors [0 ].expression
291
+ assert "aa" in validator .raised_errors [0 ].expression
292
+
293
+ def test_uppercase_geo_id (self ):
294
+ validator = Validator (self .params )
295
+ df = pd .DataFrame (["ak" , "AK" ], columns = ["geo_id" ])
296
+ validator .check_bad_geo_id_value (df , "name" , "state" )
297
+
298
+ assert len (validator .raised_errors ) == 0
299
+ assert len (validator .raised_warnings ) == 1
300
+ assert "check_geo_id_lowercase" in validator .raised_warnings [0 ].check_data_id
301
+ assert "AK" in validator .raised_warnings [0 ].expression
302
+
303
+ def test_invalid_geo_id_national (self ):
304
+ validator = Validator (self .params )
305
+ df = pd .DataFrame (["us" , "zz" ], columns = ["geo_id" ])
306
+ validator .check_bad_geo_id_value (df , "name" , "national" )
307
+
308
+ assert len (validator .raised_errors ) == 1
309
+ assert "check_geo_id_value" in validator .raised_errors [0 ].check_data_id
310
+ assert len (validator .raised_errors [0 ].expression ) == 1
311
+ assert "us" not in validator .raised_errors [0 ].expression
312
+ assert "zz" in validator .raised_errors [0 ].expression
313
+
233
314
234
315
class TestCheckBadVal :
235
316
params = {"data_source" : "" , "span_length" : 1 ,
0 commit comments