Skip to content

Commit 0c61ac4

Browse files
authored
Merge pull request #1617 from cmu-delphi/release/indicators_v0.3.13_utils_v0.3.4
Release covidcast-indicators 0.3.13
2 parents 2a8deb2 + 44e2957 commit 0c61ac4

File tree

14 files changed

+100
-63
lines changed

14 files changed

+100
-63
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.12
2+
current_version = 0.3.13
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

_delphi_utils_python/.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.3
2+
current_version = 0.3.4
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
from .nancodes import Nans
1616
from .weekday import Weekday
1717

18-
__version__ = "0.3.3"
18+
__version__ = "0.3.4"

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@ def create_dfs(self, geo_sig_df, api_df_or_error, checking_date, geo_type, signa
273273
- report: ValidationReport; report where results are added
274274
275275
Returns:
276-
- False if recent_df is empty, else (recent_df, reference_api_df)
277-
(after reference_api_df has been padded if necessary)
276+
- False if recent_df is empty after padding, else (recent_df, reference_api_df)
277+
(reference_api_df will be padded if necessary)
278278
"""
279279
# recent_lookbehind: start from the check date and working backward in time,
280280
# how many days at a time do we want to check for anomalies?
@@ -320,47 +320,71 @@ def create_dfs(self, geo_sig_df, api_df_or_error, checking_date, geo_type, signa
320320
reference_api_df = api_df_or_error.query(
321321
"time_value >= @reference_start_date & time_value <= @reference_end_date")
322322

323-
report.increment_total_checks()
323+
pre_pad_empty_flag = reference_api_df.empty
324+
reference_api_df = self.pad_reference_api_df(
325+
reference_api_df, geo_sig_df, reference_start_date, reference_end_date)
324326

327+
report.increment_total_checks()
325328
if reference_api_df.empty:
326-
report.add_raised_error(
327-
ValidationFailure("empty_reference_data",
329+
report.add_raised_error(ValidationFailure("empty_reference_data",
328330
checking_date,
329331
geo_type,
330332
signal_type,
331333
"reference data is empty; comparative checks could not "
332334
"be performed"))
333335
return False
334-
335-
reference_api_df = self.pad_reference_api_df(
336-
reference_api_df, geo_sig_df, reference_end_date)
336+
if pre_pad_empty_flag:
337+
report.add_raised_warning(ValidationFailure("empty_reference_data",
338+
checking_date,
339+
geo_type,
340+
signal_type,
341+
"pre-padding reference data is empty and indicates data "
342+
"missing from the API; please verify that this is expected"))
337343

338344
return (geo_sig_df, reference_api_df)
339345

340-
def pad_reference_api_df(self, reference_api_df, geo_sig_df, reference_end_date):
346+
# `reference_start_date` is used in the call to `geo_sig_df.query()`
347+
# below but pylint doesn't recognize that.
348+
# pylint: disable=unused-argument
349+
def pad_reference_api_df(self, reference_api_df, geo_sig_df,
350+
reference_start_date, reference_end_date):
341351
"""Check if API data is missing, and supplement from test data.
342352
343353
Arguments:
344354
- reference_api_df: API data within lookbehind range
345355
- geo_sig_df: Test data
346-
- reference_end_date: Supposed end date of reference data
356+
- reference_start_date: Desired start date of reference data
357+
- reference_end_date: Desired end date of reference data
347358
348359
Returns:
349360
- reference_api_df: Supplemented version of original
350361
"""
362+
# Value is `NaT` (not a time) if reference_api_df is empty.
351363
reference_api_df_max_date = reference_api_df.time_value.max().date()
352-
if reference_api_df_max_date < reference_end_date:
353-
# Querying geo_sig_df, only taking relevant rows
364+
if reference_api_df.empty:
365+
geo_sig_df_supplement = geo_sig_df.query(
366+
'time_value <= @reference_end_date & time_value >= \
367+
@reference_start_date')[[
368+
"geo_id", "val", "se", "sample_size", "time_value"]]
369+
elif reference_api_df_max_date < reference_end_date:
370+
# If actual end date `reference_api_df_max_date` is not as recent as
371+
# the desired end date `reference_end_date`, add rows from recently
372+
# generate data, in `geo_sig_df`, to the reference data.
354373
geo_sig_df_supplement = geo_sig_df.query(
355374
'time_value <= @reference_end_date & time_value > \
356375
@reference_api_df_max_date')[[
357376
"geo_id", "val", "se", "sample_size", "time_value"]]
358-
# Matching time_value format
359-
geo_sig_df_supplement["time_value"] = \
360-
pd.to_datetime(geo_sig_df_supplement["time_value"],
361-
format = "%Y-%m-%d %H:%M:%S")
362-
reference_api_df = pd.concat(
363-
[reference_api_df, geo_sig_df_supplement])
377+
else:
378+
return reference_api_df
379+
380+
# Final processing after supplementing reference_api_df
381+
# Matching time_value format
382+
geo_sig_df_supplement["time_value"] = \
383+
pd.to_datetime(geo_sig_df_supplement["time_value"],
384+
format = "%Y-%m-%d %H:%M:%S")
385+
reference_api_df = pd.concat(
386+
[reference_api_df, geo_sig_df_supplement])
387+
364388
return reference_api_df
365389

366390
def check_max_date_vs_reference(self, df_to_test, df_to_reference, checking_date,

_delphi_utils_python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="delphi_utils",
29-
version="0.3.3",
29+
version="0.3.4",
3030
description="Shared Utility Functions for Indicators",
3131
long_description=long_description,
3232
long_description_content_type="text/markdown",

_delphi_utils_python/tests/validator/test_dynamic.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ def test_no_padding(self):
2525
test_df = pd.DataFrame(data)
2626
ref_df = pd.DataFrame(data)
2727

28+
ref_date = datetime.strptime("2021-01-06", "%Y-%m-%d").date()
2829
new_ref_df = validator.pad_reference_api_df(
29-
ref_df, test_df, datetime.strptime("2021-01-06", "%Y-%m-%d").date())
30+
ref_df, test_df, ref_date, ref_date)
3031

3132
assert new_ref_df.equals(ref_df)
3233

@@ -42,8 +43,9 @@ def test_half_padding(self):
4243
ref_df = pd.DataFrame(ref_data)
4344
test_df = pd.DataFrame(test_data)
4445

46+
ref_date = datetime.strptime("2021-01-15", "%Y-%m-%d").date()
4547
new_ref_df = validator.pad_reference_api_df(
46-
ref_df, test_df, datetime.strptime("2021-01-15", "%Y-%m-%d").date())
48+
ref_df, test_df, ref_date, ref_date)
4749

4850
# Check it only takes missing dates - so the last 5 dates
4951
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-11",
@@ -64,8 +66,9 @@ def test_full_padding(self):
6466
ref_df = pd.DataFrame(ref_data)
6567
test_df = pd.DataFrame(test_data)
6668

69+
ref_date = datetime.strptime("2021-01-15", "%Y-%m-%d").date()
6770
new_ref_df = validator.pad_reference_api_df(
68-
ref_df, test_df, datetime.strptime("2021-01-15", "%Y-%m-%d").date())
71+
ref_df, test_df, ref_date, ref_date)
6972

7073
# Check it only takes missing dates up to the day before the reference
7174
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-15",

ansible/templates/dsew_community_profile-params-prod.json.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
"doses administered",
1212
"booster doses administered",
1313
"fully vaccinated",
14-
"booster dose since"
14+
"booster dose since",
15+
"positivity"
1516
]
1617
},
1718
"validation": {
@@ -31,7 +32,6 @@
3132
"dynamic": {
3233
"ref_window_size": 7,
3334
"smoothed_signals": [
34-
"naats_total_7dav",
3535
"naats_positivity_7dav",
3636
"confirmed_admissions_covid_1d_prop_7dav",
3737
"confirmed_admissions_covid_1d_7dav",

ansible/templates/sir_complainsalot-params-prod.json.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@
133133
"smoothed_vaccine_barrier_none_has", "smoothed_wvaccine_barrier_none_has",
134134
"smoothed_vaccine_barrier_appointment_location_has", "smoothed_wvaccine_barrier_appointment_location_has",
135135
"smoothed_vaccine_barrier_other_has", "smoothed_wvaccine_barrier_other_has",
136-
["smoothed_vaccine_barrier_appointment_location_tried", "county", "state"],
137-
["smoothed_vaccine_barrier_other_tried", "county", "state"]
136+
["smoothed_vaccine_barrier_appointment_location_tried", "county", "state"], ["smoothed_wvaccine_barrier_appointment_location_tried", "county", "state"],
137+
["smoothed_vaccine_barrier_other_tried", "county", "state"], ["smoothed_wvaccine_barrier_other_tried", "county", "state"]
138138
]
139139
},
140140
"quidel": {

dsew_community_profile/delphi_dsew_community_profile/pull.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,11 @@ def as_cached_filename(params, config):
407407

408408
def fetch_listing(params):
409409
"""Generate the list of report files to process."""
410-
listing = requests.get(DOWNLOAD_LISTING).json()['metadata']['attachments']
410+
export_start_date = params['indicator'].get(
411+
'export_start_date', datetime.datetime.fromtimestamp(0).date()
412+
)
411413

414+
listing = requests.get(DOWNLOAD_LISTING).json()['metadata']['attachments']
412415
# drop the pdf files
413416
listing = [
414417
dict(
@@ -418,33 +421,34 @@ def fetch_listing(params):
418421
)
419422
for el in listing if el['filename'].endswith("xlsx")
420423
]
421-
keep = []
424+
425+
def check_valid_publish_date(x):
426+
return x['publish_date'] >= export_start_date
427+
422428
if params['indicator']['reports'] == 'new':
423429
# drop files we already have in the input cache
424-
keep = [el for el in listing if not os.path.exists(el['cached_filename'])]
430+
keep = [
431+
el for el in listing
432+
if not os.path.exists(el['cached_filename']) and check_valid_publish_date(el)
433+
]
425434
elif params['indicator']['reports'].find("--") > 0:
426435
# drop files outside the specified publish-date range
427436
start_str, _, end_str = params['indicator']['reports'].partition("--")
428437
start_date = datetime.datetime.strptime(start_str, "%Y-%m-%d").date()
429438
end_date = datetime.datetime.strptime(end_str, "%Y-%m-%d").date()
430439
keep = [
431440
el for el in listing
432-
if start_date <= el['publish_date'] <= end_date
441+
if (start_date <= el['publish_date'] <= end_date) and check_valid_publish_date(el)
433442
]
434-
435-
# reference date is guaranteed to be on or before publish date, so we can trim
436-
# reports that are too early
437-
if 'export_start_date' in params['indicator']:
443+
elif params['indicator']['reports'] == 'all':
438444
keep = [
439-
el for el in keep
440-
if params['indicator']['export_start_date'] <= el['publish_date']
445+
el for el in listing if check_valid_publish_date(el)
441446
]
442-
# can't do the same for export_end_date
447+
else:
448+
raise ValueError("params['indicator']['reports'] is set to" \
449+
+ f" {params['indicator']['reports']}, which isn't 'new', 'all', or a date range.")
443450

444-
# if we're only running on a subset, make sure we have enough data for interp
445-
if keep:
446-
keep = extend_listing_for_interp(keep, listing)
447-
return keep if keep else listing
451+
return extend_listing_for_interp(keep, listing)
448452

449453
def extend_listing_for_interp(keep, listing):
450454
"""Grab additional files from the full listing for interpolation if needed.
@@ -617,7 +621,8 @@ def interpolate_missing_values(dfs: DataDict) -> DataDict:
617621
# https://github.com/cmu-delphi/covidcast-indicators/issues/1576
618622
_, sig, _ = key
619623
if sig == "positivity":
620-
interpolate_df[key] = df.set_index(["geo_id", "timestamp"]).sort_index().reset_index()
624+
reindexed_group_df = df.set_index(["geo_id", "timestamp"]).sort_index().reset_index()
625+
interpolate_df[key] = reindexed_group_df[~reindexed_group_df.val.isna()]
621626
continue
622627

623628
geo_dfs = []

dsew_community_profile/delphi_dsew_community_profile/run.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -99,20 +99,20 @@ def replace_date_param(p):
9999
params['indicator']['reports'] = 'all'
100100
params['indicator']['export_signals'] = {sensor_names[key] for key in new_signals}
101101

102-
dfs = fetch_new_reports(params, logger)
103-
for key, df in dfs.items():
104-
(geo, sig, is_prop) = key
105-
if sig not in params["indicator"]["export_signals"]:
106-
continue
107-
dates = create_export_csv(
108-
df,
109-
params['common']['export_dir'],
110-
geo,
111-
make_signal_name(sig, is_prop),
112-
**export_params
113-
)
114-
if len(dates)>0:
115-
run_stats.append((max(dates), len(dates)))
102+
dfs = fetch_new_reports(params, logger)
103+
for key, df in dfs.items():
104+
(geo, sig, is_prop) = key
105+
if sig not in params["indicator"]["export_signals"]:
106+
continue
107+
dates = create_export_csv(
108+
df,
109+
params['common']['export_dir'],
110+
geo,
111+
make_signal_name(sig, is_prop),
112+
**export_params
113+
)
114+
if len(dates)>0:
115+
run_stats.append((max(dates), len(dates)))
116116

117117
## log this indicator run
118118
elapsed_time_in_seconds = round(time.time() - start_time, 2)

dsew_community_profile/params.json.template

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"export_end_date": null,
1111
"export_signals": [
1212
"confirmed covid-19 admissions",
13-
"total",
1413
"positivity",
1514
"doses administered",
1615
"booster doses administered",
@@ -35,7 +34,6 @@
3534
"dynamic": {
3635
"ref_window_size": 7,
3736
"smoothed_signals": [
38-
"naats_total_7dav",
3937
"naats_positivity_7dav",
4038
"confirmed_admissions_covid_1d_prop_7dav",
4139
"confirmed_admissions_covid_1d_7dav",

dsew_community_profile/tests/test_pull.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@ def test_extend_listing(self):
569569
[{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}],
570570
[{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}, {"publish_date": date(2020, 1, 18)}]
571571
),
572+
# empty keep list
573+
example(
574+
[],
575+
[]
576+
)
572577
]
573578
for ex in examples:
574579
assert extend_listing_for_interp(ex.given, listing) == ex.expected, ex.given

facebook/qsf-tools/static/CMU/item_replacement_map.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
new_item,old_item
2+
A5,A2b
23
B8,B5
34
B9,B5
45
B10,B5
@@ -24,6 +25,7 @@ C13c,C13a
2425
C14a,C14
2526
C17,C2
2627
C17a,C17
28+
C17b,C17a
2729
V2a,V2
2830
V3a,V3
2931
V4a_1,V4_1

sir_complainsalot/params.json.template

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@
133133
"smoothed_vaccine_barrier_none_has", "smoothed_wvaccine_barrier_none_has",
134134
"smoothed_vaccine_barrier_appointment_location_has", "smoothed_wvaccine_barrier_appointment_location_has",
135135
"smoothed_vaccine_barrier_other_has", "smoothed_wvaccine_barrier_other_has",
136-
["smoothed_vaccine_barrier_appointment_location_tried", "county", "state"],
137-
["smoothed_vaccine_barrier_other_tried", "county", "state"]
136+
["smoothed_vaccine_barrier_appointment_location_tried", "county", "state"], ["smoothed_wvaccine_barrier_appointment_location_tried", "county", "state"],
137+
["smoothed_vaccine_barrier_other_tried", "county", "state"], ["smoothed_wvaccine_barrier_other_tried", "county", "state"]
138138
]
139139
},
140140
"quidel": {

0 commit comments

Comments
 (0)