Skip to content

Commit c4a57fc

Browse files
authored
Merge pull request #1256 from alexcoda/standardize-logging
Standardize logging
2 parents dd3b8f5 + b6361de commit c4a57fc

File tree

18 files changed

+117
-92
lines changed

18 files changed

+117
-92
lines changed

combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,6 @@
3333
covidcast.covidcast._ASYNC_CALL = True # pylint: disable=protected-access
3434

3535

36-
def check_none_data_frame(data_frame, label, date_range):
37-
"""Log and return True when a data frame is None."""
38-
if data_frame is None:
39-
print(f"{label} completely unavailable in range {date_range}")
40-
return True
41-
return False
42-
43-
4436
def maybe_append(usa_facts, jhu):
4537
"""
4638
Append dataframes if available, otherwise return USAFacts.
@@ -133,7 +125,7 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca
133125
issues=issue_range
134126
)
135127

136-
if check_none_data_frame(usafacts_df, "USA-FACTS", date_range):
128+
if usafacts_df is None:
137129
return None
138130

139131
merged_df = merge_dfs_by_geos(usafacts_df, jhu_df, geo)
@@ -142,7 +134,8 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca
142134
return unique_dates
143135

144136

145-
def combine_usafacts_and_jhu(signal, geo, date_range, issue_range=None, fetcher=covidcast.signal):
137+
def combine_usafacts_and_jhu(signal, geo, date_range, logger,
138+
issue_range=None, fetcher=covidcast.signal):
146139
"""Add rows for PR from JHU signals to USA-FACTS signals.
147140
148141
For hhs and nation, fetch the county `num` data so we can compute the proportions correctly
@@ -158,6 +151,7 @@ def combine_usafacts_and_jhu(signal, geo, date_range, issue_range=None, fetcher=
158151

159152
# This occurs if the usafacts ~and the jhu query were empty
160153
if unique_dates is None:
154+
logger.info("USA-FACTS completely unavailable for dates", date_range=date_range)
161155
return EMPTY_FRAME
162156

163157
# Query only the represented window so that every geo is represented; a single window call is
@@ -329,9 +323,15 @@ def run_module(params):
329323
log_exceptions=params["common"].get("log_exceptions", True))
330324

331325
for metric, geo_res, sensor_name, signal in variants:
326+
logger.info("Generating signal and exporting to CSV",
327+
geo_res = geo_res,
328+
metric = metric,
329+
sensor = sensor_name,
330+
signal = signal)
332331
df = combine_usafacts_and_jhu(signal,
333332
geo_res,
334333
extend_raw_date_range(params, sensor_name),
334+
logger,
335335
params['indicator']['issue_range'])
336336
df["timestamp"] = pd.to_datetime(df["timestamp"])
337337
start_date = pd.to_datetime(params['indicator']['export_start_date'])

combo_cases_and_deaths/tests/test_run.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Tests for running combo cases and deaths indicator."""
2+
import logging
23
from datetime import date
34
from itertools import product
45
import os
@@ -17,6 +18,7 @@
1718
COLUMN_MAPPING)
1819
from delphi_combo_cases_and_deaths.constants import METRICS, SMOOTH_TYPES, SENSORS
1920

21+
TEST_LOGGER = logging.getLogger()
2022

2123
def test_issue_dates():
2224
"""The smoothed value for a particular date is computed from the raw
@@ -98,7 +100,7 @@ def make_mock(geo):
98100
("1 1", 4, 1 if geo in ["nation", "hhs"] else 2),
99101
("0 0", 2, 0)
100102
]:
101-
df = combine_usafacts_and_jhu("", geo, date_range, fetcher=mock_covidcast_signal)
103+
df = combine_usafacts_and_jhu("", geo, date_range, TEST_LOGGER, fetcher=mock_covidcast_signal)
102104
assert df.size == expected_size * len(COLUMN_MAPPING), f"""
103105
Wrong number of rows in combined data frame for the number of available signals.
104106
@@ -126,7 +128,7 @@ def test_multiple_issues(mock_covidcast_signal):
126128
}),
127129
None
128130
] * 2
129-
result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), fetcher=mock_covidcast_signal)
131+
result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal)
130132
pd.testing.assert_frame_equal(
131133
result,
132134
pd.DataFrame(
@@ -186,23 +188,23 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
186188
] * 6 # each call to combine_usafacts_and_jhu makes (2 + 2 * len(unique_timestamps)) = 12 calls to the fetcher
187189

188190
pd.testing.assert_frame_equal(
189-
combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal),
191+
combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal),
190192
pd.DataFrame({"timestamp": [20200101],
191193
"geo_id": ["us"],
192194
"val": [50 + 100 + 200],
193195
"se": [None],
194196
"sample_size": [None]})
195197
)
196198
pd.testing.assert_frame_equal(
197-
combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal),
199+
combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal),
198200
pd.DataFrame({"timestamp": [20200101],
199201
"geo_id": ["us"],
200202
"val": [(50 + 100 + 200) / (4903185 + 3723066) * 100000],
201203
"se": [None],
202204
"sample_size": [None]})
203205
)
204206
pd.testing.assert_frame_equal(
205-
combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), fetcher=mock_covidcast_signal),
207+
combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal),
206208
pd.DataFrame({"geo_id": ["01000", "01001", "72001"],
207209
"val": [50, 100, 200],
208210
"timestamp": [20200101, 20200101, 20200101]},
@@ -229,7 +231,7 @@ def test_no_nation_jhu(mock_covidcast_signal):
229231
"value": [1],
230232
"timestamp": [20200101]})
231233
]
232-
result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal)
234+
result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal)
233235

234236
assert mock_covidcast_signal.call_args_list[-1] == call(
235237
"jhu-csse",

covid_act_now/delphi_covid_act_now/run.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def run_module(params):
4545
parquet_url = params["indicator"]["parquet_url"]
4646

4747
# Load CAN county-level testing data
48-
print("Pulling CAN data")
48+
logger.info("Pulling CAN data")
4949
df_pq = load_data(parquet_url)
5050
df_county_testing = extract_testing_metrics(df_pq)
5151

@@ -54,7 +54,8 @@ def run_module(params):
5454
max_dates_exported = []
5555
# Perform geo aggregations and export to receiving
5656
for geo_res in GEO_RESOLUTIONS:
57-
print(f"Processing {geo_res}")
57+
logger.info("Generating signal and exporting to CSV",
58+
geo_res = geo_res)
5859
df = geo_map(df_county_testing, geo_res)
5960

6061
# Export 'pcr_specimen_positivity_rate'
@@ -79,7 +80,7 @@ def run_module(params):
7980
max_dates_exported.append(latest)
8081
# x2 to count both positivity and tests signals
8182
num_exported_files += exported_csv_dates.size * 2
82-
print(f"Exported dates: {earliest} to {latest}")
83+
logger.info("Exported for dates between", earliest=earliest, latest=latest)
8384

8485
elapsed_time_in_seconds = round(time.time() - start_time, 2)
8586
max_lag_in_days = (datetime.now() - min(max_dates_exported)).days

hhs_facilities/delphi_hhs_facilities/run.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def run_module(params) -> None:
3636
filled_fips_df = fill_missing_fips(raw_df, gmpr)
3737
stats = []
3838
for geo, (sig_name, sig_cols, sig_func, sig_offset) in product(GEO_RESOLUTIONS, SIGNALS):
39+
logger.info("Generating signal and exporting to CSV",
40+
geo_res = geo,
41+
signal_name = sig_name)
3942
mapped_df = convert_geo(filled_fips_df, geo, gmpr)
4043
output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset)
4144
dates = create_export_csv(output_df, params["common"]["export_dir"], geo, sig_name)

hhs_hosp/delphi_hhs/run.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ def run_module(params):
105105
geo_mapper = GeoMapper()
106106
stats = []
107107
for sensor, smoother, geo in product(SIGNALS, SMOOTHERS, GEOS):
108+
logger.info("Generating signal and exporting to CSV",
109+
geo_res = geo,
110+
sensor = sensor,
111+
smoother = smoother)
108112
df = geo_mapper.add_geocode(make_signal(all_columns, sensor),
109113
"state_id",
110114
"state_code",

nchs_mortality/delphi_nchs_mortality/archive_diffs.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from delphi_utils import S3ArchiveDiffer
1010

11-
def arch_diffs(params, daily_arch_diff):
11+
def arch_diffs(params, daily_arch_diff, logger):
1212
"""
1313
Archive differences between new updates and existing data.
1414
@@ -23,6 +23,8 @@ def arch_diffs(params, daily_arch_diff):
2323
Read from params.json
2424
daily_arch_diff: S3ArchiveDiffer
2525
Used to store and update cache
26+
logger: logging.Logger
27+
The structured logger.
2628
"""
2729
weekly_export_dir = params["common"]["weekly_export_dir"]
2830
daily_export_dir = params["common"]["daily_export_dir"]
@@ -59,7 +61,7 @@ def arch_diffs(params, daily_arch_diff):
5961

6062
# Report failures: someone should probably look at them
6163
for exported_file in fails:
62-
print(f"Failed to archive (weekly) '{exported_file}'")
64+
logger.info("Failed to archive (weekly)", filename={exported_file})
6365

6466
# Daily run of archiving utility
6567
# - Uploads changed files to S3
@@ -83,4 +85,4 @@ def arch_diffs(params, daily_arch_diff):
8385

8486
# Report failures: someone should probably look at them
8587
for exported_file in fails:
86-
print(f"Failed to archive (daily) '{exported_file}'")
88+
logger.info("Failed to archive (daily)", filename={exported_file})

nchs_mortality/delphi_nchs_mortality/run.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ def run_module(params: Dict[str, Any]):
6262
df_pull = pull_nchs_mortality_data(token, test_file)
6363
for metric in METRICS:
6464
if metric == 'percent_of_expected_deaths':
65-
print(metric)
65+
logger.info("Generating signal and exporting to CSV",
66+
metric = metric)
6667
df = df_pull.copy()
6768
df["val"] = df[metric]
6869
df["se"] = np.nan
@@ -80,7 +81,9 @@ def run_module(params: Dict[str, Any]):
8081
stats.append((max(dates), len(dates)))
8182
else:
8283
for sensor in SENSORS:
83-
print(metric, sensor)
84+
logger.info("Generating signal and exporting to CSV",
85+
metric = metric,
86+
sensor = sensor)
8487
df = df_pull.copy()
8588
if sensor == "num":
8689
df["val"] = df[metric]
@@ -107,7 +110,7 @@ def run_module(params: Dict[str, Any]):
107110
# - Uploads changed files to S3
108111
# - Does not export any issues into receiving
109112
if "archive" in params:
110-
arch_diffs(params, daily_arch_diff)
113+
arch_diffs(params, daily_arch_diff, logger)
111114

112115
elapsed_time_in_seconds = round(time.time() - start_time, 2)
113116
min_max_date = stats and min(s[0] for s in stats)

quidel/delphi_quidel/data_tools.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,6 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs
8686
Same length as tests; proportion of parent observations to borrow.
8787
"""
8888
if (np.any(np.isnan(tpooled_tests)) or np.any(np.isnan(tpooled_ptests))):
89-
print(tpooled_tests)
90-
print(tpooled_ptests)
9189
raise ValueError('[parent] tests should be non-negative '
9290
'with no np.nan')
9391
if max_borrow_obs > min_obs:
@@ -153,7 +151,6 @@ def raw_positive_prop(positives, tests, min_obs):
153151
positives = positives.astype(float)
154152
tests = tests.astype(float)
155153
if np.any(np.isnan(positives)) or np.any(np.isnan(tests)):
156-
print(positives, tests)
157154
raise ValueError('positives and tests should be non-negative '
158155
'with no np.nan')
159156
if np.any(positives > tests):
@@ -290,11 +287,10 @@ def raw_tests_per_device(devices, tests, min_obs):
290287
"""
291288
devices = devices.astype(float)
292289
tests = tests.astype(float)
293-
if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))):
294-
print(devices)
295-
print(tests)
296-
raise ValueError('devices and tests should be non-negative '
297-
'with no np.nan')
290+
if np.any(np.isnan(devices)) or np.any(devices < 0):
291+
raise ValueError("devices should be non-negative with no np.nan")
292+
if np.any(np.isnan(tests)) or np.any(tests < 0):
293+
raise ValueError("tests should be non-negative with no np.nan")
298294
if min_obs <= 0:
299295
raise ValueError('min_obs should be positive')
300296
tests[tests < min_obs] = np.nan

quidel/delphi_quidel/pull.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def regulate_column_names(df, test_type):
8282
return df
8383

8484
def get_from_email(column_names, start_dates, end_dates, mail_server,
85-
account, sender, password):
85+
account, sender, password, logger):
8686
"""
8787
Get raw data from email account.
8888
@@ -98,6 +98,8 @@ def get_from_email(column_names, start_dates, end_dates, mail_server,
9898
email account of the sender
9999
password: str
100100
password of the datadrop email
101+
logger: logging.Logger
102+
The structured logger.
101103
102104
Returns:
103105
df: pd.DataFrame
@@ -131,7 +133,7 @@ def get_from_email(column_names, start_dates, end_dates, mail_server,
131133
if not whether_in_range:
132134
continue
133135

134-
print(f"Pulling {test} data received on %s"%search_date.date())
136+
logger.info("Pulling data", test=test, date=search_date.date())
135137
toread = io.BytesIO()
136138
toread.write(att.payload)
137139
toread.seek(0) # reset the pointer
@@ -153,10 +155,9 @@ def fix_zipcode(df):
153155
zipcode = int(float(zipcode))
154156
zipcode5.append(zipcode)
155157
df['zip'] = zipcode5
156-
# print('Fixing %.2f %% of the data' % (fixnum * 100 / len(zipcode5)))
157158
return df
158159

159-
def fix_date(df):
160+
def fix_date(df, logger):
160161
"""
161162
Remove invalid dates and select correct test date to use.
162163
@@ -175,16 +176,16 @@ def fix_date(df):
175176
df.insert(2, "timestamp", df["TestDate"])
176177

177178
mask = df["TestDate"] <= df["StorageDate"]
178-
print("Removing %.2f%% of unusual data" % ((len(df) - np.sum(mask)) * 100 / len(df)))
179+
logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f}% of unusual data")
179180
df = df[mask]
180181

181182
mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)
182-
print("Fixing %.2f%% of outdated data" % (np.sum(mask) * 100 / len(df)))
183+
logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f}% of outdated data")
183184
df["timestamp"].values[mask] = df["StorageDate"].values[mask]
184185
return df
185186

186187
def preprocess_new_data(start_dates, end_dates, mail_server, account,
187-
sender, password, test_mode):
188+
sender, password, test_mode, logger):
188189
"""
189190
Pull and pre-process Quidel Antigen Test data from datadrop email.
190191
@@ -206,6 +207,8 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,
206207
password of the datadrop email
207208
test_mode: bool
208209
pull raw data from email or not
210+
logger: logging.Logger
211+
The structured logger.
209212
Returns:
210213
df: pd.DataFrame
211214
time_flag: datetime.date:
@@ -220,21 +223,20 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,
220223
else:
221224
# Get new data from email
222225
dfs, time_flag = get_from_email(COLUMN_NAMES, start_dates, end_dates,
223-
mail_server, account, sender, password)
226+
mail_server, account, sender, password, logger)
224227

225228
# No new data can be pulled
226229
if time_flag is None:
227230
return dfs, time_flag
228231

229232
df_finals = {}
230233
for test_type in TEST_TYPES:
231-
print(f"For {test_type}:")
234+
logger.info(f"For {test_type}:")
232235
df = dfs[test_type]
233236
# Fix some of the fipcodes that are 9 digit instead of 5 digit
234237
df = fix_zipcode(df)
235238
# Create a column CanonicalDate according to StarageDate and TestDate
236-
df = fix_date(df)
237-
239+
df = fix_date(df, logger)
238240
# Compute numUniqueDevices
239241
numUniqueDevices = df.groupby(
240242
by=["timestamp", "zip"],
@@ -309,17 +311,15 @@ def check_intermediate_file(cache_dir, pull_start_dates):
309311
sep=",", parse_dates=["timestamp"])
310312
return previous_dfs, pull_start_dates
311313

312-
def pull_quidel_data(params):
314+
def pull_quidel_data(params, logger):
313315
"""
314316
Pull new quidel test data and decide whether to combine it with historical records in ./cache.
315317
316318
Parameters:
317319
params: dict
318320
including all the information read from params.json
319-
END_FROM_TODAY_MINUS: int
320-
report data until - X days
321-
EXPORT_DAY_RANGE: int
322-
number of dates to report
321+
logger: logging.Logger
322+
The structured logger.
323323
324324
Returns:
325325
DataFrame:
@@ -355,7 +355,7 @@ def pull_quidel_data(params):
355355
# Use _end_date to check the most recent date that we received data
356356
dfs, _end_date = preprocess_new_data(
357357
pull_start_dates, pull_end_dates, mail_server,
358-
account, sender, password, test_mode)
358+
account, sender, password, test_mode, logger)
359359

360360
# Utilize previously stored data
361361
for test_type in TEST_TYPES:

0 commit comments

Comments
 (0)