Skip to content

Commit 7cb9b8c

Browse files
committed
Nancodes archiver/export: explicit tests
1 parent a46f866 commit 7cb9b8c

File tree

2 files changed

+119
-36
lines changed

2 files changed

+119
-36
lines changed

_delphi_utils_python/tests/test_archive.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,17 @@
6161
"se": [0.1],
6262
"sample_size": [10.0]
6363
}),
64+
65+
# Common, but missing columns removed
66+
"csv5": pd.DataFrame({
67+
"geo_id": ["1"],
68+
"val": [1.0],
69+
"se": [0.1],
70+
"sample_size": [10.0],
71+
"missing_val": [Nans.NOT_MISSING],
72+
"missing_se": [Nans.NOT_MISSING],
73+
"missing_sample_size": [Nans.NOT_MISSING],
74+
}),
6475
}
6576

6677
CSVS_AFTER = {
@@ -106,6 +117,14 @@
106117
"missing_se": [Nans.NOT_MISSING],
107118
"missing_sample_size": [Nans.NOT_MISSING],
108119
}),
120+
121+
# Common, but missing columns removed
122+
"csv5": pd.DataFrame({
123+
"geo_id": ["1"],
124+
"val": [1.0],
125+
"se": [0.1],
126+
"sample_size": [10.0]
127+
}),
109128
}
110129

111130
class TestArchiveDiffer:
@@ -156,7 +175,7 @@ def test_diff_and_filter_exports(self, tmp_path):
156175
# Check return values
157176
assert set(deleted_files) == {join(cache_dir, "csv2.csv")}
158177
assert set(common_diffs.keys()) == {
159-
join(export_dir, f) for f in ["csv0.csv", "csv1.csv", "csv4.csv"]}
178+
join(export_dir, f) for f in ["csv0.csv", "csv1.csv", "csv4.csv", "csv5.csv"]}
160179
assert set(new_files) == {join(export_dir, "csv3.csv")}
161180
assert common_diffs[join(export_dir, "csv0.csv")] is None
162181
assert common_diffs[join(export_dir, "csv1.csv")] == join(
@@ -165,7 +184,8 @@ def test_diff_and_filter_exports(self, tmp_path):
165184
# Check filesystem for actual files
166185
assert set(listdir(export_dir)) == {
167186
"csv0.csv", "csv1.csv", "csv1.csv.diff",
168-
"csv3.csv", "csv4.csv", "csv4.csv.diff"
187+
"csv3.csv", "csv4.csv", "csv4.csv.diff",
188+
"csv5.csv", "csv5.csv.diff"
169189
}
170190
assert_frame_equal(
171191
pd.read_csv(join(export_dir, "csv1.csv.diff"), dtype=CSV_DTYPES),
@@ -184,7 +204,7 @@ def test_diff_and_filter_exports(self, tmp_path):
184204
arch_diff.filter_exports(common_diffs)
185205

186206
# Check exports directory just has incremental changes
187-
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv"}
207+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv"}
188208
assert_frame_equal(
189209
pd.read_csv(join(export_dir, "csv1.csv"), dtype=CSV_DTYPES),
190210
csv1_diff)
@@ -311,7 +331,7 @@ def test_run(self, tmp_path, s3_client):
311331
assert_frame_equal(pd.read_csv(body, dtype=CSV_DTYPES), df)
312332

313333
# Check exports directory just has incremental changes
314-
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv"}
334+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv"}
315335
csv1_diff = pd.DataFrame({
316336
"geo_id": ["3", "2", "4"],
317337
"val": [np.nan, 2.1, 4.0],
@@ -524,7 +544,7 @@ def test_run(self, tmp_path):
524544
original_branch.checkout()
525545

526546
# Check exports directory just has incremental changes
527-
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv"}
547+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv"}
528548
csv1_diff = pd.DataFrame({
529549
"geo_id": ["3", "2", "4"],
530550
"val": [np.nan, 2.1, 4.0],

_delphi_utils_python/tests/test_export.py

Lines changed: 94 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from delphi_utils import create_export_csv, Nans
1111

12+
1213
def _clean_directory(directory):
1314
"""Clean files out of a directory."""
1415
for fname in listdir(directory):
@@ -29,6 +30,7 @@ def _non_ignored_files_set(directory):
2930

3031
class TestExport:
3132
"""Tests for exporting CSVs."""
33+
3234
# List of times for data points.
3335
TIMES = [
3436
datetime.strptime(x, "%Y-%m-%d")
@@ -54,9 +56,19 @@ class TestExport:
5456
"val": [3.12345678910, np.nan, 2.2, 2.6],
5557
"se": [0.15, 0.22, np.nan, 0.34],
5658
"sample_size": [100, 100, 101, None],
57-
"missing_val": [Nans.NOT_MISSING, Nans.OTHER, Nans.NOT_MISSING, Nans.NOT_MISSING],
58-
"missing_se": [Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.OTHER, Nans.NOT_MISSING],
59-
"missing_sample_size": [Nans.NOT_MISSING] * 3 + [Nans.OTHER]
59+
"missing_val": [
60+
Nans.NOT_MISSING,
61+
Nans.OTHER,
62+
Nans.NOT_MISSING,
63+
Nans.NOT_MISSING,
64+
],
65+
"missing_se": [
66+
Nans.NOT_MISSING,
67+
Nans.NOT_MISSING,
68+
Nans.OTHER,
69+
Nans.NOT_MISSING,
70+
],
71+
"missing_sample_size": [Nans.NOT_MISSING] * 3 + [Nans.OTHER],
6072
}
6173
)
6274

@@ -68,9 +80,19 @@ class TestExport:
6880
"val": [np.nan, np.nan, 2.2, 2.6],
6981
"se": [0.15, 0.22, np.nan, 0.34],
7082
"sample_size": [100, 100, 101, None],
71-
"missing_val": [Nans.NOT_MISSING, Nans.OTHER, Nans.NOT_MISSING, Nans.NOT_MISSING],
72-
"missing_se": [Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.OTHER, Nans.NOT_MISSING],
73-
"missing_sample_size": [Nans.NOT_MISSING] * 3 + [Nans.OTHER]
83+
"missing_val": [
84+
Nans.NOT_MISSING,
85+
Nans.OTHER,
86+
Nans.NOT_MISSING,
87+
Nans.NOT_MISSING,
88+
],
89+
"missing_se": [
90+
Nans.NOT_MISSING,
91+
Nans.NOT_MISSING,
92+
Nans.OTHER,
93+
Nans.NOT_MISSING,
94+
],
95+
"missing_sample_size": [Nans.NOT_MISSING] * 3 + [Nans.OTHER],
7496
}
7597
)
7698

@@ -116,10 +138,14 @@ def test_export_rounding(self):
116138
)
117139
pd.testing.assert_frame_equal(
118140
pd.read_csv(join(self.TEST_DIR, "20200215_county_deaths_test.csv")),
119-
pd.DataFrame({"geo_id": [51093, 51175],
120-
"val": [round(3.12345678910, 7), 2.1],
121-
"se": [0.15, 0.22],
122-
"sample_size": [100, 100]})
141+
pd.DataFrame(
142+
{
143+
"geo_id": [51093, 51175],
144+
"val": [round(3.12345678910, 7), 2.1],
145+
"se": [0.15, 0.22],
146+
"sample_size": [100, 100],
147+
}
148+
),
123149
)
124150

125151
def test_export_without_metric(self):
@@ -211,13 +237,16 @@ def test_export_with_null_removal(self):
211237
"""Test that `remove_null_samples = True` removes entries with null samples."""
212238
_clean_directory(self.TEST_DIR)
213239

214-
df_with_nulls = self.DF.copy().append({
215-
"geo_id": "66666",
216-
"timestamp": datetime(2020, 6, 6),
217-
"val": 10,
218-
"se": 0.2,
219-
"sample_size": pd.NA},
220-
ignore_index=True)
240+
df_with_nulls = self.DF.copy().append(
241+
{
242+
"geo_id": "66666",
243+
"timestamp": datetime(2020, 6, 6),
244+
"val": 10,
245+
"se": 0.2,
246+
"sample_size": pd.NA,
247+
},
248+
ignore_index=True,
249+
)
221250

222251
create_export_csv(
223252
df=df_with_nulls,
@@ -241,13 +270,16 @@ def test_export_without_null_removal(self):
241270
"""Test that `remove_null_samples = False` does not remove entries with null samples."""
242271
_clean_directory(self.TEST_DIR)
243272

244-
df_with_nulls = self.DF.copy().append({
245-
"geo_id": "66666",
246-
"timestamp": datetime(2020, 6, 6),
247-
"val": 10,
248-
"se": 0.2,
249-
"sample_size": pd.NA},
250-
ignore_index=True)
273+
df_with_nulls = self.DF.copy().append(
274+
{
275+
"geo_id": "66666",
276+
"timestamp": datetime(2020, 6, 6),
277+
"val": 10,
278+
"se": 0.2,
279+
"sample_size": pd.NA,
280+
},
281+
ignore_index=True,
282+
)
251283

252284
create_export_csv(
253285
df=df_with_nulls,
@@ -267,24 +299,56 @@ def test_export_without_null_removal(self):
267299
)
268300
assert pd.read_csv(join(self.TEST_DIR, "20200606_state_test.csv")).size > 0
269301

302+
def test_export_df_without_missingness(self):
303+
_clean_directory(self.TEST_DIR)
304+
305+
create_export_csv(
306+
df=self.DF.copy(), export_dir=self.TEST_DIR, geo_res="county", sensor="test"
307+
)
308+
df = pd.read_csv(join(self.TEST_DIR, "20200215_county_test.csv")).astype(
309+
{"geo_id": str, "sample_size": int}
310+
)
311+
expected_df = pd.DataFrame(
312+
{
313+
"geo_id": ["51093", "51175"],
314+
"val": [3.12345678910, 2.1],
315+
"se": [0.15, 0.22],
316+
"sample_size": [100, 100],
317+
}
318+
).astype({"geo_id": str, "sample_size": int})
319+
pd.testing.assert_frame_equal(df, expected_df)
320+
270321
def test_export_df_with_missingness(self):
271322
_clean_directory(self.TEST_DIR)
272323

273324
create_export_csv(
274325
df=self.DF2.copy(),
275326
export_dir=self.TEST_DIR,
276-
geo_res="state",
327+
geo_res="county",
277328
sensor="test",
278-
remove_null_samples=False
279329
)
280330
assert _non_ignored_files_set(self.TEST_DIR) == set(
281331
[
282-
"20200215_state_test.csv",
283-
"20200301_state_test.csv",
284-
"20200315_state_test.csv",
332+
"20200215_county_test.csv",
333+
"20200301_county_test.csv",
334+
"20200315_county_test.csv",
285335
]
286336
)
287-
assert pd.read_csv(join(self.TEST_DIR, "20200315_state_test.csv")).size > 0
337+
df = pd.read_csv(join(self.TEST_DIR, "20200215_county_test.csv")).astype(
338+
{"geo_id": str, "sample_size": int}
339+
)
340+
expected_df = pd.DataFrame(
341+
{
342+
"geo_id": ["51093", "51175"],
343+
"val": [3.12345678910, np.nan],
344+
"se": [0.15, 0.22],
345+
"sample_size": [100, 100],
346+
"missing_val": [Nans.NOT_MISSING, Nans.OTHER],
347+
"missing_se": [Nans.NOT_MISSING] * 2,
348+
"missing_sample_size": [Nans.NOT_MISSING] * 2,
349+
}
350+
).astype({"geo_id": str, "sample_size": int})
351+
pd.testing.assert_frame_equal(df, expected_df)
288352

289353
@mock.patch("delphi_utils.logger")
290354
def test_export_df_with_contradictory_missingness(self, mock_logger):
@@ -295,7 +359,6 @@ def test_export_df_with_contradictory_missingness(self, mock_logger):
295359
export_dir=self.TEST_DIR,
296360
geo_res="state",
297361
sensor="test",
298-
remove_null_samples=False,
299362
logger=mock_logger
300363
)
301364
assert _non_ignored_files_set(self.TEST_DIR) == set(

0 commit comments

Comments
 (0)