Skip to content

Commit 2f9c20a

Browse files
authored
Merge pull request #1287 from cmu-delphi/release/indicators_v0.1.20_utils_v0.1.14
Release covidcast-indicators 0.1.20
2 parents f3aa36a + 9e7b825 commit 2f9c20a

File tree

6 files changed

+79
-26
lines changed

6 files changed

+79
-26
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.1.19
2+
current_version = 0.1.20
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

_delphi_utils_python/.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.1.13
2+
current_version = 0.1.14
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
from .signal import add_prefix
1515
from .nancodes import Nans
1616

17-
__version__ = "0.1.13"
17+
__version__ = "0.1.14"

_delphi_utils_python/delphi_utils/archive.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def diff_export_csv(
105105
# Code deleted entries as nans with the deleted missing code
106106
deleted_df = before_df.loc[deleted_idx, :].copy()
107107
deleted_df[["val", "se", "sample_size"]] = np.nan
108-
deleted_df[["missing_val", "missing_se", "missing_sample_size"]] = Nans.DELETED
108+
if "missing_val" in after_df_cmn.columns:
109+
deleted_df[["missing_val", "missing_se", "missing_sample_size"]] = Nans.DELETED
109110

110111
return (
111112
deleted_df,

_delphi_utils_python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
setup(
2727
name="delphi_utils",
28-
version="0.1.13",
28+
version="0.1.14",
2929
description="Shared Utility Functions for Indicators",
3030
long_description=long_description,
3131
long_description_content_type="text/markdown",

_delphi_utils_python/tests/test_archive.py

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818

1919
CSV_DTYPES = {
2020
"geo_id": str, "val": float, "se": float, "sample_size": float,
21-
"missing_val": int, "missing_se":int, "missing_sample_size": int
21+
"missing_val": int, "missing_se": int, "missing_sample_size": int
2222
}
2323

2424
CSVS_BEFORE = {
25-
# Common
25+
# All rows unchanged
2626
"csv0": pd.DataFrame({
2727
"geo_id": ["1", "2", "3"],
2828
"val": [1.000000001, 2.00000002, 3.00000003],
@@ -33,6 +33,7 @@
3333
"missing_sample_size": [Nans.NOT_MISSING] * 3,
3434
}),
3535

36+
# One row deleted and one row added
3637
"csv1": pd.DataFrame({
3738
"geo_id": ["1", "2", "3"],
3839
"val": [1.0, 2.0, 3.0],
@@ -43,7 +44,7 @@
4344
"missing_sample_size": [Nans.NOT_MISSING] * 3,
4445
}),
4546

46-
# Deleted
47+
# File deleted
4748
"csv2": pd.DataFrame({
4849
"geo_id": ["1"],
4950
"val": [1.0],
@@ -54,15 +55,15 @@
5455
"missing_sample_size": [Nans.NOT_MISSING],
5556
}),
5657

57-
# Common, but updated with missing columns
58+
# All rows common, but missing columns added
5859
"csv4": pd.DataFrame({
5960
"geo_id": ["1"],
6061
"val": [1.0],
6162
"se": [0.1],
6263
"sample_size": [10.0]
6364
}),
6465

65-
# Common, but missing columns removed
66+
# All rows common, but missing columns removed
6667
"csv5": pd.DataFrame({
6768
"geo_id": ["1"],
6869
"val": [1.0],
@@ -72,10 +73,34 @@
7273
"missing_se": [Nans.NOT_MISSING],
7374
"missing_sample_size": [Nans.NOT_MISSING],
7475
}),
76+
77+
# All rows common, but no missing columns
78+
"csv6": pd.DataFrame({
79+
"geo_id": ["1"],
80+
"val": [1.0],
81+
"se": [0.1],
82+
"sample_size": [10.0]
83+
}),
84+
85+
# Row deleted and row added, but no missing columns (will not be uploaded)
86+
"csv7": pd.DataFrame({
87+
"geo_id": ["1", "2"],
88+
"val": [1.0, 2.0],
89+
"se": [0.1, 0.2],
90+
"sample_size": [10.0, 20.0]
91+
}),
92+
93+
# Row deleted and row added, but no missing columns
94+
"csv8": pd.DataFrame({
95+
"geo_id": ["1", "2"],
96+
"val": [1.0, 2.0],
97+
"se": [0.1, 0.2],
98+
"sample_size": [10.0, 20.0]
99+
}),
75100
}
76101

77102
CSVS_AFTER = {
78-
# Common
103+
# All rows unchanged
79104
"csv0": pd.DataFrame({
80105
"geo_id": ["1", "2", "3"],
81106
"val": [1.0, 2.0, 3.0],
@@ -86,6 +111,7 @@
86111
"missing_sample_size": [Nans.NOT_MISSING] * 3,
87112
}),
88113

114+
# One row deleted and one row added
89115
"csv1": pd.DataFrame({
90116
"geo_id": ["1", "2", "4"],
91117
"val": [1.0, 2.1, 4.0],
@@ -96,7 +122,7 @@
96122
"missing_sample_size": [Nans.NOT_MISSING] * 3,
97123
}),
98124

99-
# Added
125+
# File added
100126
"csv3": pd.DataFrame({
101127
"geo_id": ["2"],
102128
"val": [2.0000002],
@@ -107,7 +133,7 @@
107133
"missing_sample_size": [Nans.NOT_MISSING],
108134
}),
109135

110-
# Common, but updated with missing columns
136+
# All rows common, but missing columns added
111137
"csv4": pd.DataFrame({
112138
"geo_id": ["1"],
113139
"val": [1.0],
@@ -118,13 +144,37 @@
118144
"missing_sample_size": [Nans.NOT_MISSING],
119145
}),
120146

121-
# Common, but missing columns removed
147+
# All rows common, but missing columns removed
122148
"csv5": pd.DataFrame({
123149
"geo_id": ["1"],
124150
"val": [1.0],
125151
"se": [0.1],
126152
"sample_size": [10.0]
127153
}),
154+
155+
# All rows common, but no missing columns
156+
"csv6": pd.DataFrame({
157+
"geo_id": ["1"],
158+
"val": [1.0],
159+
"se": [0.1],
160+
"sample_size": [10.0]
161+
}),
162+
163+
# Row deleted and row added, but no missing columns (will not be uploaded)
164+
"csv7": pd.DataFrame({
165+
"geo_id": ["1"],
166+
"val": [1.0],
167+
"se": [0.1],
168+
"sample_size": [10.0]
169+
}),
170+
171+
# Row deleted and row added, but no missing columns
172+
"csv8": pd.DataFrame({
173+
"geo_id": ["1", "3"],
174+
"val": [1.0, 3.0],
175+
"se": [0.1, 0.3],
176+
"sample_size": [10.0, 30.0]
177+
}),
128178
}
129179

130180
class TestArchiveDiffer:
@@ -175,17 +225,22 @@ def test_diff_and_filter_exports(self, tmp_path):
175225
# Check return values
176226
assert set(deleted_files) == {join(cache_dir, "csv2.csv")}
177227
assert set(common_diffs.keys()) == {
178-
join(export_dir, f) for f in ["csv0.csv", "csv1.csv", "csv4.csv", "csv5.csv"]}
228+
join(export_dir, f) for f in ["csv0.csv", "csv1.csv", "csv4.csv", "csv5.csv", "csv6.csv", "csv7.csv", "csv8.csv"]}
179229
assert set(new_files) == {join(export_dir, "csv3.csv")}
180230
assert common_diffs[join(export_dir, "csv0.csv")] is None
181231
assert common_diffs[join(export_dir, "csv1.csv")] == join(
182232
export_dir, "csv1.csv.diff")
183233

184234
# Check filesystem for actual files
185235
assert set(listdir(export_dir)) == {
186-
"csv0.csv", "csv1.csv", "csv1.csv.diff",
187-
"csv3.csv", "csv4.csv", "csv4.csv.diff",
188-
"csv5.csv", "csv5.csv.diff"
236+
"csv0.csv",
237+
"csv1.csv", "csv1.csv.diff",
238+
"csv3.csv",
239+
"csv4.csv", "csv4.csv.diff",
240+
"csv5.csv", "csv5.csv.diff",
241+
"csv6.csv",
242+
"csv7.csv", "csv7.csv.diff",
243+
"csv8.csv", "csv8.csv.diff"
189244
}
190245
assert_frame_equal(
191246
pd.read_csv(join(export_dir, "csv1.csv.diff"), dtype=CSV_DTYPES),
@@ -204,7 +259,7 @@ def test_diff_and_filter_exports(self, tmp_path):
204259
arch_diff.filter_exports(common_diffs)
205260

206261
# Check exports directory just has incremental changes
207-
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv"}
262+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv", "csv7.csv", "csv8.csv"}
208263
assert_frame_equal(
209264
pd.read_csv(join(export_dir, "csv1.csv"), dtype=CSV_DTYPES),
210265
csv1_diff)
@@ -325,13 +380,11 @@ def test_run(self, tmp_path, s3_client):
325380

326381
# Check that the buckets now contain the exported files.
327382
for csv_name, df in CSVS_AFTER.items():
328-
body = s3_client.get_object(
329-
Bucket=self.bucket_name,
330-
Key=f"{self.indicator_prefix}/{csv_name}.csv")["Body"]
383+
body = s3_client.get_object(Bucket=self.bucket_name, Key=f"{self.indicator_prefix}/{csv_name}.csv")["Body"]
331384
assert_frame_equal(pd.read_csv(body, dtype=CSV_DTYPES), df)
332385

333386
# Check exports directory just has incremental changes
334-
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv"}
387+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv", "csv7.csv", "csv8.csv"}
335388
csv1_diff = pd.DataFrame({
336389
"geo_id": ["3", "2", "4"],
337390
"val": [np.nan, 2.1, 4.0],
@@ -539,12 +592,11 @@ def test_run(self, tmp_path):
539592
arch_diff.get_branch(branch_name).checkout()
540593
for csv_name, df in CSVS_AFTER.items():
541594
assert_frame_equal(
542-
pd.read_csv(
543-
join(cache_dir, f"{csv_name}.csv"), dtype=CSV_DTYPES), df)
595+
pd.read_csv(join(cache_dir, f"{csv_name}.csv"), dtype=CSV_DTYPES), df)
544596
original_branch.checkout()
545597

546598
# Check exports directory just has incremental changes
547-
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv"}
599+
assert set(listdir(export_dir)) == {"csv1.csv", "csv3.csv", "csv4.csv", "csv5.csv", "csv7.csv", "csv8.csv"}
548600
csv1_diff = pd.DataFrame({
549601
"geo_id": ["3", "2", "4"],
550602
"val": [np.nan, 2.1, 4.0],

0 commit comments

Comments
 (0)