Skip to content

Commit b56a961

Browse files
authored
Merge branch 'main' into support-experimental-wave-12
2 parents 7421d11 + b5825a8 commit b56a961

File tree

56 files changed

+644
-341
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+644
-341
lines changed

.github/workflows/r-ci.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,15 @@ jobs:
4848
${{ runner.os }}-r-facebook-survey-
4949
- name: Install R dependencies
5050
run: |
51-
if ( packageVersion("readr") != "1.4.0" ) {
51+
if ( !require("readr") || packageVersion("readr") != "1.4.0" ) {
5252
install.packages("devtools")
5353
devtools::install_version("readr", version = "1.4.0")
5454
}
55-
install.packages("remotes")
56-
remotes::update_packages(c("rcmdcheck", "mockr"), upgrade="always")
55+
56+
if ( !require("remotes") ) {
57+
install.packages("remotes")
58+
}
59+
remotes::update_packages(c("rcmdcheck", "mockr", "remotes"), upgrade="always")
5760
dependency_list <- remotes::dev_package_deps(dependencies=TRUE)
5861
remotes::update_packages(dependency_list$package[dependency_list$package != "readr"], upgrade="always")
5962
shell: Rscript {0}

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
params.json
55

66
# Do not commit output files
7-
receiving/*.csv
7+
**/receiving/*.csv
88

99
# Do not commit hidden macOS files
1010
.DS_Store

_delphi_utils_python/delphi_utils/archive.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@
4040
from git import Repo
4141
from git.refs.head import Head
4242
import pandas as pd
43+
import numpy as np
4344

4445
from .utils import read_params
4546
from .logger import get_structured_logger
47+
from .nancodes import Nans
4648

4749
Files = List[str]
4850
FileDiffMap = Dict[str, Optional[str]]
@@ -73,8 +75,10 @@ def diff_export_csv(
7375
changed_df is the pd.DataFrame of common rows from after_csv with changed values.
7476
added_df is the pd.DataFrame of added rows from after_csv.
7577
"""
76-
export_csv_dtypes = {"geo_id": str, "val": float,
77-
"se": float, "sample_size": float}
78+
export_csv_dtypes = {
79+
"geo_id": str, "val": float, "se": float, "sample_size": float,
80+
"missing_val": int, "missing_se": int, "missing_sample_size": int
81+
}
7882

7983
before_df = pd.read_csv(before_csv, dtype=export_csv_dtypes)
8084
before_df.set_index("geo_id", inplace=True)
@@ -89,12 +93,22 @@ def diff_export_csv(
8993
before_df_cmn = before_df.reindex(common_idx)
9094
after_df_cmn = after_df.reindex(common_idx)
9195

92-
# Exact comparisons, treating NA == NA as True
93-
same_mask = before_df_cmn == after_df_cmn
94-
same_mask |= pd.isna(before_df_cmn) & pd.isna(after_df_cmn)
96+
# If CSVs have different columns (no missingness), mark all values as new
97+
if ("missing_val" in before_df_cmn.columns) ^ ("missing_val" in after_df_cmn.columns):
98+
same_mask = after_df_cmn.copy()
99+
same_mask.loc[:] = False
100+
else:
101+
# Exact comparisons, treating NA == NA as True
102+
same_mask = before_df_cmn == after_df_cmn
103+
same_mask |= pd.isna(before_df_cmn) & pd.isna(after_df_cmn)
104+
105+
# Code deleted entries as nans with the deleted missing code
106+
deleted_df = before_df.loc[deleted_idx, :].copy()
107+
deleted_df[["val", "se", "sample_size"]] = np.nan
108+
deleted_df[["missing_val", "missing_se", "missing_sample_size"]] = Nans.DELETED
95109

96110
return (
97-
before_df.loc[deleted_idx, :],
111+
deleted_df,
98112
after_df_cmn.loc[~(same_mask.all(axis=1)), :],
99113
after_df.loc[added_idx, :])
100114

@@ -227,11 +241,11 @@ def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]:
227241

228242
deleted_df, changed_df, added_df = diff_export_csv(
229243
before_file, after_file)
230-
new_issues_df = pd.concat([changed_df, added_df], axis=0)
244+
new_issues_df = pd.concat([deleted_df, changed_df, added_df], axis=0)
231245

232246
if len(deleted_df) > 0:
233247
print(
234-
f"Warning, diff has deleted indices in {after_file} that will be ignored")
248+
f"Diff has deleted indices in {after_file} that have been coded as nans.")
235249

236250
# Write the diffs to diff_file, if applicable
237251
if len(new_issues_df) > 0:
@@ -414,6 +428,9 @@ def archive_exports(self, # pylint: disable=arguments-differ
414428
archive_success.append(exported_file)
415429
except FileNotFoundError:
416430
archive_fail.append(exported_file)
431+
except shutil.SameFileError:
432+
# no need to copy if the cached file is the same
433+
archive_success.append(exported_file)
417434

418435
self._exports_archived = True
419436

_delphi_utils_python/delphi_utils/export.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,33 @@
33
from datetime import datetime
44
from os.path import join
55
from typing import Optional
6+
import logging
67

8+
from epiweeks import Week
79
import numpy as np
810
import pandas as pd
911

12+
from .nancodes import Nans
13+
14+
def filter_contradicting_missing_codes(df, sensor, metric, date, logger=None):
15+
"""Find values with contradictory missingness codes, filter them, and log."""
16+
columns = ["val", "se", "sample_size"]
17+
# Get indicies where the XNOR is true (i.e. both are true or both are false).
18+
masks = [
19+
~(df[column].isna() ^ df["missing_" + column].eq(Nans.NOT_MISSING))
20+
for column in columns
21+
]
22+
for mask in masks:
23+
if not logger is None and df.loc[mask].size > 0:
24+
logger.info(
25+
"Filtering contradictory missing code in " +
26+
"{0}_{1}_{2}.".format(sensor, metric, date.strftime(format="%Y-%m-%d"))
27+
)
28+
df = df.loc[~mask]
29+
elif logger is None and df.loc[mask].size > 0:
30+
df = df.loc[~mask]
31+
return df
32+
1033
def create_export_csv(
1134
df: pd.DataFrame,
1235
export_dir: str,
@@ -16,7 +39,9 @@ def create_export_csv(
1639
start_date: Optional[datetime] = None,
1740
end_date: Optional[datetime] = None,
1841
remove_null_samples: Optional[bool] = False,
19-
write_empty_days: Optional[bool] = False
42+
write_empty_days: Optional[bool] = False,
43+
logger: Optional[logging.Logger] = None,
44+
weekly_dates = False,
2045
):
2146
"""Export data in the format expected by the Delphi API.
2247
@@ -43,6 +68,8 @@ def create_export_csv(
4368
write_empty_days: Optional[bool]
4469
If true, every day in between start_date and end_date will have a CSV file written
4570
even if there is no data for the day. If false, only the days present are written.
71+
logger: Optional[logging.Logger]
72+
Pass a logger object here to log information about contradictory missing codes.
4673
4774
Returns
4875
---------
@@ -65,12 +92,30 @@ def create_export_csv(
6592
dates = pd.date_range(start_date, end_date)
6693

6794
for date in dates:
95+
if weekly_dates:
96+
t = Week.fromdate(pd.to_datetime(str(date)))
97+
date_str = "weekly_" + str(t.year) + str(t.week).zfill(2)
98+
else:
99+
date_str = date.strftime('%Y%m%d')
68100
if metric is None:
69-
export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{sensor}.csv"
101+
export_filename = f"{date_str}_{geo_res}_{sensor}.csv"
70102
else:
71-
export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{metric}_{sensor}.csv"
103+
export_filename = f"{date_str}_{geo_res}_{metric}_{sensor}.csv"
72104
export_file = join(export_dir, export_filename)
73-
export_df = df[df["timestamp"] == date][["geo_id", "val", "se", "sample_size",]]
105+
expected_columns = [
106+
"geo_id",
107+
"val",
108+
"se",
109+
"sample_size",
110+
"missing_val",
111+
"missing_se",
112+
"missing_sample_size"
113+
]
114+
export_df = df[df["timestamp"] == date].filter(items=expected_columns)
115+
if "missing_val" in export_df.columns:
116+
export_df = filter_contradicting_missing_codes(
117+
export_df, sensor, metric, date, logger=logger
118+
)
74119
if remove_null_samples:
75120
export_df = export_df[export_df["sample_size"].notnull()]
76121
export_df = export_df.round({"val": 7, "se": 7})

_delphi_utils_python/setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
required = [
88
"boto3",
99
"covidcast",
10+
"epiweeks",
1011
"freezegun",
1112
"gitpython",
1213
"mock",
@@ -35,7 +36,7 @@
3536
classifiers=[
3637
"Development Status :: 5 - Production/Stable",
3738
"Intended Audience :: Developers",
38-
"Programming Language :: Python :: 3.7",
39+
"Programming Language :: Python :: 3.8",
3940
],
4041
packages=find_packages(),
4142
package_data={'': ['data/*.csv']}

0 commit comments

Comments
 (0)