9
9
10
10
from delphi_utils import create_export_csv , Nans
11
11
12
+
12
13
def _clean_directory (directory ):
13
14
"""Clean files out of a directory."""
14
15
for fname in listdir (directory ):
@@ -29,6 +30,7 @@ def _non_ignored_files_set(directory):
29
30
30
31
class TestExport :
31
32
"""Tests for exporting CSVs."""
33
+
32
34
# List of times for data points.
33
35
TIMES = [
34
36
datetime .strptime (x , "%Y-%m-%d" )
@@ -54,9 +56,19 @@ class TestExport:
54
56
"val" : [3.12345678910 , np .nan , 2.2 , 2.6 ],
55
57
"se" : [0.15 , 0.22 , np .nan , 0.34 ],
56
58
"sample_size" : [100 , 100 , 101 , None ],
57
- "missing_val" : [Nans .NOT_MISSING , Nans .OTHER , Nans .NOT_MISSING , Nans .NOT_MISSING ],
58
- "missing_se" : [Nans .NOT_MISSING , Nans .NOT_MISSING , Nans .OTHER , Nans .NOT_MISSING ],
59
- "missing_sample_size" : [Nans .NOT_MISSING ] * 3 + [Nans .OTHER ]
59
+ "missing_val" : [
60
+ Nans .NOT_MISSING ,
61
+ Nans .OTHER ,
62
+ Nans .NOT_MISSING ,
63
+ Nans .NOT_MISSING ,
64
+ ],
65
+ "missing_se" : [
66
+ Nans .NOT_MISSING ,
67
+ Nans .NOT_MISSING ,
68
+ Nans .OTHER ,
69
+ Nans .NOT_MISSING ,
70
+ ],
71
+ "missing_sample_size" : [Nans .NOT_MISSING ] * 3 + [Nans .OTHER ],
60
72
}
61
73
)
62
74
@@ -68,9 +80,19 @@ class TestExport:
68
80
"val" : [np .nan , np .nan , 2.2 , 2.6 ],
69
81
"se" : [0.15 , 0.22 , np .nan , 0.34 ],
70
82
"sample_size" : [100 , 100 , 101 , None ],
71
- "missing_val" : [Nans .NOT_MISSING , Nans .OTHER , Nans .NOT_MISSING , Nans .NOT_MISSING ],
72
- "missing_se" : [Nans .NOT_MISSING , Nans .NOT_MISSING , Nans .OTHER , Nans .NOT_MISSING ],
73
- "missing_sample_size" : [Nans .NOT_MISSING ] * 3 + [Nans .OTHER ]
83
+ "missing_val" : [
84
+ Nans .NOT_MISSING ,
85
+ Nans .OTHER ,
86
+ Nans .NOT_MISSING ,
87
+ Nans .NOT_MISSING ,
88
+ ],
89
+ "missing_se" : [
90
+ Nans .NOT_MISSING ,
91
+ Nans .NOT_MISSING ,
92
+ Nans .OTHER ,
93
+ Nans .NOT_MISSING ,
94
+ ],
95
+ "missing_sample_size" : [Nans .NOT_MISSING ] * 3 + [Nans .OTHER ],
74
96
}
75
97
)
76
98
@@ -116,10 +138,14 @@ def test_export_rounding(self):
116
138
)
117
139
pd .testing .assert_frame_equal (
118
140
pd .read_csv (join (self .TEST_DIR , "20200215_county_deaths_test.csv" )),
119
- pd .DataFrame ({"geo_id" : [51093 , 51175 ],
120
- "val" : [round (3.12345678910 , 7 ), 2.1 ],
121
- "se" : [0.15 , 0.22 ],
122
- "sample_size" : [100 , 100 ]})
141
+ pd .DataFrame (
142
+ {
143
+ "geo_id" : [51093 , 51175 ],
144
+ "val" : [round (3.12345678910 , 7 ), 2.1 ],
145
+ "se" : [0.15 , 0.22 ],
146
+ "sample_size" : [100 , 100 ],
147
+ }
148
+ ),
123
149
)
124
150
125
151
def test_export_without_metric (self ):
@@ -211,13 +237,16 @@ def test_export_with_null_removal(self):
211
237
"""Test that `remove_null_samples = True` removes entries with null samples."""
212
238
_clean_directory (self .TEST_DIR )
213
239
214
- df_with_nulls = self .DF .copy ().append ({
215
- "geo_id" : "66666" ,
216
- "timestamp" : datetime (2020 , 6 , 6 ),
217
- "val" : 10 ,
218
- "se" : 0.2 ,
219
- "sample_size" : pd .NA },
220
- ignore_index = True )
240
+ df_with_nulls = self .DF .copy ().append (
241
+ {
242
+ "geo_id" : "66666" ,
243
+ "timestamp" : datetime (2020 , 6 , 6 ),
244
+ "val" : 10 ,
245
+ "se" : 0.2 ,
246
+ "sample_size" : pd .NA ,
247
+ },
248
+ ignore_index = True ,
249
+ )
221
250
222
251
create_export_csv (
223
252
df = df_with_nulls ,
@@ -241,13 +270,16 @@ def test_export_without_null_removal(self):
241
270
"""Test that `remove_null_samples = False` does not remove entries with null samples."""
242
271
_clean_directory (self .TEST_DIR )
243
272
244
- df_with_nulls = self .DF .copy ().append ({
245
- "geo_id" : "66666" ,
246
- "timestamp" : datetime (2020 , 6 , 6 ),
247
- "val" : 10 ,
248
- "se" : 0.2 ,
249
- "sample_size" : pd .NA },
250
- ignore_index = True )
273
+ df_with_nulls = self .DF .copy ().append (
274
+ {
275
+ "geo_id" : "66666" ,
276
+ "timestamp" : datetime (2020 , 6 , 6 ),
277
+ "val" : 10 ,
278
+ "se" : 0.2 ,
279
+ "sample_size" : pd .NA ,
280
+ },
281
+ ignore_index = True ,
282
+ )
251
283
252
284
create_export_csv (
253
285
df = df_with_nulls ,
@@ -267,24 +299,56 @@ def test_export_without_null_removal(self):
267
299
)
268
300
assert pd .read_csv (join (self .TEST_DIR , "20200606_state_test.csv" )).size > 0
269
301
302
+ def test_export_df_without_missingness (self ):
303
+ _clean_directory (self .TEST_DIR )
304
+
305
+ create_export_csv (
306
+ df = self .DF .copy (), export_dir = self .TEST_DIR , geo_res = "county" , sensor = "test"
307
+ )
308
+ df = pd .read_csv (join (self .TEST_DIR , "20200215_county_test.csv" )).astype (
309
+ {"geo_id" : str , "sample_size" : int }
310
+ )
311
+ expected_df = pd .DataFrame (
312
+ {
313
+ "geo_id" : ["51093" , "51175" ],
314
+ "val" : [3.12345678910 , 2.1 ],
315
+ "se" : [0.15 , 0.22 ],
316
+ "sample_size" : [100 , 100 ],
317
+ }
318
+ ).astype ({"geo_id" : str , "sample_size" : int })
319
+ pd .testing .assert_frame_equal (df , expected_df )
320
+
270
321
def test_export_df_with_missingness (self ):
271
322
_clean_directory (self .TEST_DIR )
272
323
273
324
create_export_csv (
274
325
df = self .DF2 .copy (),
275
326
export_dir = self .TEST_DIR ,
276
- geo_res = "state " ,
327
+ geo_res = "county " ,
277
328
sensor = "test" ,
278
- remove_null_samples = False
279
329
)
280
330
assert _non_ignored_files_set (self .TEST_DIR ) == set (
281
331
[
282
- "20200215_state_test .csv" ,
283
- "20200301_state_test .csv" ,
284
- "20200315_state_test .csv" ,
332
+ "20200215_county_test .csv" ,
333
+ "20200301_county_test .csv" ,
334
+ "20200315_county_test .csv" ,
285
335
]
286
336
)
287
- assert pd .read_csv (join (self .TEST_DIR , "20200315_state_test.csv" )).size > 0
337
+ df = pd .read_csv (join (self .TEST_DIR , "20200215_county_test.csv" )).astype (
338
+ {"geo_id" : str , "sample_size" : int }
339
+ )
340
+ expected_df = pd .DataFrame (
341
+ {
342
+ "geo_id" : ["51093" , "51175" ],
343
+ "val" : [3.12345678910 , np .nan ],
344
+ "se" : [0.15 , 0.22 ],
345
+ "sample_size" : [100 , 100 ],
346
+ "missing_val" : [Nans .NOT_MISSING , Nans .OTHER ],
347
+ "missing_se" : [Nans .NOT_MISSING ] * 2 ,
348
+ "missing_sample_size" : [Nans .NOT_MISSING ] * 2 ,
349
+ }
350
+ ).astype ({"geo_id" : str , "sample_size" : int })
351
+ pd .testing .assert_frame_equal (df , expected_df )
288
352
289
353
@mock .patch ("delphi_utils.logger" )
290
354
def test_export_df_with_contradictory_missingness (self , mock_logger ):
@@ -295,7 +359,6 @@ def test_export_df_with_contradictory_missingness(self, mock_logger):
295
359
export_dir = self .TEST_DIR ,
296
360
geo_res = "state" ,
297
361
sensor = "test" ,
298
- remove_null_samples = False ,
299
362
logger = mock_logger
300
363
)
301
364
assert _non_ignored_files_set (self .TEST_DIR ) == set (
0 commit comments