Skip to content

Commit ec697ed

Browse files
committed
Nancodes archiver: remove deleted file nan replacements
1 parent e07d9cb commit ec697ed

File tree

2 files changed

+8
-67
lines changed

2 files changed

+8
-67
lines changed

_delphi_utils_python/delphi_utils/archive.py

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -254,26 +254,7 @@ def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]:
254254
new_issues_df.to_csv(diff_file, na_rep="NA")
255255
common_diffs[after_file] = diff_file
256256

257-
export_csv_dtypes = {
258-
"geo_id": str, "val": float, "se": float, "sample_size": float,
259-
"missing_val": int, "missing_se": int, "missing_sample_size": int
260-
}
261-
262-
# Replace deleted files with empty versions, but only if the cached version is not
263-
# already empty
264-
deleted_files_nanfilled = []
265-
for deleted_file in deleted_files:
266-
deleted_df = pd.read_csv(deleted_file, dtype=export_csv_dtypes)
267-
print(
268-
f"Diff deleted {deleted_file}; generating corresponding CSV with deleted rows."
269-
)
270-
deleted_df[["val", "se", "sample_size"]] = np.nan
271-
deleted_df[["missing_val", "missing_se", "missing_sample_size"]] = Nans.DELETED
272-
filename = join(self.export_dir, basename(deleted_file))
273-
deleted_df.to_csv(filename, index=False)
274-
deleted_files_nanfilled.append(filename)
275-
276-
return deleted_files_nanfilled, common_diffs, new_files
257+
return deleted_files, common_diffs, new_files
277258

278259
def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:
279260
"""
@@ -331,13 +312,12 @@ def run(self):
331312
self.update_cache()
332313

333314
# Diff exports, and make incremental versions
334-
deleted_files, common_diffs, new_files = self.diff_exports()
315+
_, common_diffs, new_files = self.diff_exports()
335316

336317
# Archive changed, new, and emptied deleted files
337318
to_archive = [f for f, diff in common_diffs.items()
338319
if diff is not None]
339320
to_archive += new_files
340-
to_archive += deleted_files
341321
_, fails = self.archive_exports(to_archive)
342322

343323
# Filter existing exports to exclude those that failed to archive

_delphi_utils_python/tests/test_archive.py

Lines changed: 6 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,6 @@ def test_diff_and_filter_exports(self, tmp_path):
135135
"missing_sample_size": [Nans.DELETED] + [Nans.NOT_MISSING] * 2,
136136
})
137137

138-
csv2_deleted = pd.DataFrame({
139-
"geo_id": ["1"],
140-
"val": [np.nan],
141-
"se": [np.nan],
142-
"sample_size": [np.nan],
143-
"missing_val": [Nans.DELETED],
144-
"missing_se": [Nans.DELETED],
145-
"missing_sample_size": [Nans.DELETED],
146-
})
147-
148138
arch_diff = ArchiveDiffer(cache_dir, export_dir)
149139

150140
# Test diff_exports
@@ -164,7 +154,7 @@ def test_diff_and_filter_exports(self, tmp_path):
164154
deleted_files, common_diffs, new_files = arch_diff.diff_exports()
165155

166156
# Check return values
167-
assert set(deleted_files) == {join(export_dir, "csv2.csv")}
157+
assert set(deleted_files) == {join(cache_dir, "csv2.csv")}
168158
assert set(common_diffs.keys()) == {
169159
join(export_dir, f) for f in ["csv0.csv", "csv1.csv", "csv4.csv"]}
170160
assert set(new_files) == {join(export_dir, "csv3.csv")}
@@ -175,8 +165,7 @@ def test_diff_and_filter_exports(self, tmp_path):
175165
# Check filesystem for actual files
176166
assert set(listdir(export_dir)) == {
177167
"csv0.csv", "csv1.csv", "csv1.csv.diff",
178-
"csv3.csv", "csv4.csv", "csv4.csv.diff",
179-
"csv2.csv"
168+
"csv3.csv", "csv4.csv", "csv4.csv.diff"
180169
}
181170
assert_frame_equal(
182171
pd.read_csv(join(export_dir, "csv1.csv.diff"), dtype=CSV_DTYPES),
@@ -194,11 +183,8 @@ def test_diff_and_filter_exports(self, tmp_path):
194183

195184
arch_diff.filter_exports(common_diffs)
196185

197-
# Check exports directory just has incremental and deleted changes
198-
assert set(listdir(export_dir)) == {"csv1.csv", "csv2.csv", "csv3.csv", "csv4.csv"}
199-
assert_frame_equal(
200-
pd.read_csv(join(export_dir, "csv2.csv"), dtype=CSV_DTYPES),
201-
csv2_deleted)
186+
# Check exports directory just has incremental changes
187+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv"}
202188
assert_frame_equal(
203189
pd.read_csv(join(export_dir, "csv1.csv"), dtype=CSV_DTYPES),
204190
csv1_diff)
@@ -325,7 +311,7 @@ def test_run(self, tmp_path, s3_client):
325311
assert_frame_equal(pd.read_csv(body, dtype=CSV_DTYPES), df)
326312

327313
# Check exports directory just has incremental changes
328-
assert set(listdir(export_dir)) == {"csv1.csv", "csv2.csv", "csv3.csv", "csv4.csv"}
314+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv"}
329315
csv1_diff = pd.DataFrame({
330316
"geo_id": ["3", "2", "4"],
331317
"val": [np.nan, 2.1, 4.0],
@@ -338,18 +324,6 @@ def test_run(self, tmp_path, s3_client):
338324
assert_frame_equal(
339325
pd.read_csv(join(export_dir, "csv1.csv"), dtype=CSV_DTYPES),
340326
csv1_diff)
341-
csv2_deleted = pd.DataFrame({
342-
"geo_id": ["1"],
343-
"val": [np.nan],
344-
"se": [np.nan],
345-
"sample_size": [np.nan],
346-
"missing_val": [Nans.DELETED],
347-
"missing_se": [Nans.DELETED],
348-
"missing_sample_size": [Nans.DELETED],
349-
})
350-
assert_frame_equal(
351-
pd.read_csv(join(export_dir, "csv2.csv"), dtype=CSV_DTYPES),
352-
csv2_deleted)
353327

354328

355329
class TestGitArchiveDiffer:
@@ -550,7 +524,7 @@ def test_run(self, tmp_path):
550524
original_branch.checkout()
551525

552526
# Check exports directory just has incremental changes
553-
assert set(listdir(export_dir)) == {"csv1.csv", "csv2.csv", "csv3.csv", "csv4.csv"}
527+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv"}
554528
csv1_diff = pd.DataFrame({
555529
"geo_id": ["3", "2", "4"],
556530
"val": [np.nan, 2.1, 4.0],
@@ -563,19 +537,6 @@ def test_run(self, tmp_path):
563537
assert_frame_equal(
564538
pd.read_csv(join(export_dir, "csv1.csv"), dtype=CSV_DTYPES),
565539
csv1_diff)
566-
csv2_deleted = pd.DataFrame({
567-
"geo_id": ["1"],
568-
"val": [np.nan],
569-
"se": [np.nan],
570-
"sample_size": [np.nan],
571-
"missing_val": [Nans.DELETED],
572-
"missing_se": [Nans.DELETED],
573-
"missing_sample_size": [Nans.DELETED],
574-
})
575-
assert_frame_equal(
576-
pd.read_csv(join(export_dir, "csv2.csv"), dtype=CSV_DTYPES),
577-
csv2_deleted)
578-
579540

580541

581542
class TestFromParams:

0 commit comments

Comments
 (0)