From efd9423e9e550e8bda38c0219c4670b4a43d11a5 Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 11:35:05 -0700
Subject: [PATCH 01/12] Fix value check in quidel data_tools

---
 quidel/delphi_quidel/data_tools.py                     | 9 ++++-----
 quidel_covidtest/delphi_quidel_covidtest/data_tools.py | 9 ++++-----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/quidel/delphi_quidel/data_tools.py b/quidel/delphi_quidel/data_tools.py
index 5d67dd812..6f9cb45c9 100644
--- a/quidel/delphi_quidel/data_tools.py
+++ b/quidel/delphi_quidel/data_tools.py
@@ -290,11 +290,10 @@ def raw_tests_per_device(devices, tests, min_obs):
     """
     devices = devices.astype(float)
     tests = tests.astype(float)
-    if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))):
-        print(devices)
-        print(tests)
-        raise ValueError('devices and tests should be non-negative '
-                         'with no np.nan')
+    if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices):
+        raise ValueError("devices should be non-negative with no np.nan")
+    if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests):
+        raise ValueError("tests should be non-negative with no np.nan")
     if min_obs <= 0:
         raise ValueError('min_obs should be positive')
     tests[tests < min_obs] = np.nan
diff --git a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
index 18898ec8e..54995dc90 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
@@ -296,11 +296,10 @@ def raw_tests_per_device(devices, tests, min_obs):
     """
     devices = devices.astype(float)
     tests = tests.astype(float)
-    if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))):
-        print(devices)
-        print(tests)
-        raise ValueError('devices and tests should be non-negative '
-                         'with no np.nan')
+    if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices):
+        raise ValueError("devices should be non-negative with no np.nan")
+    if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests):
+        raise ValueError("tests should be non-negative with no np.nan")
     if min_obs <= 0:
         raise ValueError('min_obs should be positive')
     tests[tests < min_obs] = np.nan

From 77f26f7fefb05e4ed4fb536e2016a15639c2f97c Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 11:38:24 -0700
Subject: [PATCH 02/12] Replace print statements with logging

And add log statements for data export variants
---
 .../delphi_combo_cases_and_deaths/run.py      | 19 +++++------
 covid_act_now/delphi_covid_act_now/run.py     |  7 ++--
 hhs_facilities/delphi_hhs_facilities/run.py   |  3 ++
 hhs_hosp/delphi_hhs/run.py                    |  4 +++
 .../delphi_nchs_mortality/archive_diffs.py    |  8 +++--
 nchs_mortality/delphi_nchs_mortality/run.py   |  7 ++--
 quidel/delphi_quidel/data_tools.py            |  3 --
 quidel/delphi_quidel/pull.py                  | 34 +++++++++----------
 quidel/delphi_quidel/run.py                   |  9 ++---
 .../delphi_quidel_covidtest/data_tools.py     |  3 --
 .../delphi_quidel_covidtest/pull.py           | 31 +++++++++--------
 .../delphi_quidel_covidtest/run.py            | 12 ++++---
 .../delphi_safegraph_patterns/process.py      | 16 ++++++---
 .../delphi_safegraph_patterns/run.py          |  3 +-
 usafacts/delphi_usafacts/run.py               |  2 +-
 15 files changed, 90 insertions(+), 71 deletions(-)

diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py
index c54f3f3be..bddd1833f 100755
--- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py
+++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py
@@ -33,14 +33,6 @@
 covidcast.covidcast._ASYNC_CALL = True  # pylint: disable=protected-access
 
 
-def check_none_data_frame(data_frame, label, date_range):
-    """Log and return True when a data frame is None."""
-    if data_frame is None:
-        print(f"{label} completely unavailable in range {date_range}")
-        return True
-    return False
-
-
 def maybe_append(usa_facts, jhu):
     """
     Append dataframes if available, otherwise return USAFacts.
@@ -133,7 +125,7 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca
         issues=issue_range
     )
 
-    if check_none_data_frame(usafacts_df, "USA-FACTS", date_range):
+    if usafacts_df is None:
         return None
 
     merged_df = merge_dfs_by_geos(usafacts_df, jhu_df, geo)
@@ -142,7 +134,7 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca
     return unique_dates
 
 
-def combine_usafacts_and_jhu(signal, geo, date_range, issue_range=None, fetcher=covidcast.signal):
+def combine_usafacts_and_jhu(signal, geo, date_range, logger, issue_range=None, fetcher=covidcast.signal):
     """Add rows for PR from JHU signals to USA-FACTS signals.
 
     For hhs and nation, fetch the county `num` data so we can compute the proportions correctly
@@ -158,6 +150,7 @@ def combine_usafacts_and_jhu(signal, geo, date_range, issue_range=None, fetcher=
 
     # This occurs if the usafacts ~and the jhu query were empty
     if unique_dates is None:
+        logger.info("USA-FACTS completely unavailable for dates", date_range=date_range)
         return EMPTY_FRAME
 
     # Query only the represented window so that every geo is represented; a single window call is
@@ -329,9 +322,15 @@ def run_module(params):
         log_exceptions=params["common"].get("log_exceptions", True))
 
     for metric, geo_res, sensor_name, signal in variants:
+        logger.info("Generating signal and exporting to CSV",
+                    geo_res = geo_res,
+                    metric = metric,
+                    sensor = sensor_name,
+                    signal = signal)
         df = combine_usafacts_and_jhu(signal,
                                       geo_res,
                                       extend_raw_date_range(params, sensor_name),
+                                      logger,
                                       params['indicator']['issue_range'])
         df["timestamp"] = pd.to_datetime(df["timestamp"])
         start_date = pd.to_datetime(params['indicator']['export_start_date'])
diff --git a/covid_act_now/delphi_covid_act_now/run.py b/covid_act_now/delphi_covid_act_now/run.py
index d9d983f0d..7cc96f6e4 100644
--- a/covid_act_now/delphi_covid_act_now/run.py
+++ b/covid_act_now/delphi_covid_act_now/run.py
@@ -45,7 +45,7 @@ def run_module(params):
     parquet_url = params["indicator"]["parquet_url"]
 
     # Load CAN county-level testing data
-    print("Pulling CAN data")
+    logger.info("Pulling CAN data")
     df_pq = load_data(parquet_url)
     df_county_testing = extract_testing_metrics(df_pq)
 
@@ -54,7 +54,8 @@ def run_module(params):
     max_dates_exported = []
     # Perform geo aggregations and export to receiving
     for geo_res in GEO_RESOLUTIONS:
-        print(f"Processing {geo_res}")
+        logger.info("Generating signal and exporting to CSV",
+                    geo_res = geo_res)
         df = geo_map(df_county_testing, geo_res)
 
         # Export 'pcr_specimen_positivity_rate'
@@ -79,7 +80,7 @@ def run_module(params):
         max_dates_exported.append(latest)
         # x2 to count both positivity and tests signals
         num_exported_files += exported_csv_dates.size * 2
-        print(f"Exported dates: {earliest} to {latest}")
+        logger.info("Exported for dates between", earliest=earliest, latest=latest)
 
     elapsed_time_in_seconds = round(time.time() - start_time, 2)
     max_lag_in_days = (datetime.now() - min(max_dates_exported)).days
diff --git a/hhs_facilities/delphi_hhs_facilities/run.py b/hhs_facilities/delphi_hhs_facilities/run.py
index b41df5bcc..43d3a9bdd 100644
--- a/hhs_facilities/delphi_hhs_facilities/run.py
+++ b/hhs_facilities/delphi_hhs_facilities/run.py
@@ -36,6 +36,9 @@ def run_module(params) -> None:
     filled_fips_df = fill_missing_fips(raw_df, gmpr)
     stats = []
     for geo, (sig_name, sig_cols, sig_func, sig_offset) in product(GEO_RESOLUTIONS, SIGNALS):
+        logger.info("Generating signal and exporting to CSV",
+                    geo_res = geo,
+                    signal_name = sig_name)
         mapped_df = convert_geo(filled_fips_df, geo, gmpr)
         output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset)
         dates = create_export_csv(output_df, params["common"]["export_dir"], geo, sig_name)
diff --git a/hhs_hosp/delphi_hhs/run.py b/hhs_hosp/delphi_hhs/run.py
index 6af654845..45c2f5bc1 100644
--- a/hhs_hosp/delphi_hhs/run.py
+++ b/hhs_hosp/delphi_hhs/run.py
@@ -105,6 +105,10 @@ def run_module(params):
     geo_mapper = GeoMapper()
     stats = []
     for sensor, smoother, geo in product(SIGNALS, SMOOTHERS, GEOS):
+        logger.info("Generating signal and exporting to CSV",
+                    geo_res = geo,
+                    sensor = sensor,
+                    smoother = smoother)
         df = geo_mapper.add_geocode(make_signal(all_columns, sensor),
                                     "state_id",
                                     "state_code",
diff --git a/nchs_mortality/delphi_nchs_mortality/archive_diffs.py b/nchs_mortality/delphi_nchs_mortality/archive_diffs.py
index 6524203b3..e8b790cee 100644
--- a/nchs_mortality/delphi_nchs_mortality/archive_diffs.py
+++ b/nchs_mortality/delphi_nchs_mortality/archive_diffs.py
@@ -8,7 +8,7 @@
 
 from delphi_utils import S3ArchiveDiffer
 
-def arch_diffs(params, daily_arch_diff):
+def arch_diffs(params, daily_arch_diff, logger):
     """
     Archive differences between new updates and existing data.
 
@@ -23,6 +23,8 @@ def arch_diffs(params, daily_arch_diff):
         Read from params.json
     daily_arch_diff: S3ArchiveDiffer
         Used to store and update cache
+    logger: logging.Logger
+        The structured logger.
     """
     weekly_export_dir = params["common"]["weekly_export_dir"]
     daily_export_dir = params["common"]["daily_export_dir"]
@@ -59,7 +61,7 @@ def arch_diffs(params, daily_arch_diff):
 
         # Report failures: someone should probably look at them
         for exported_file in fails:
-            print(f"Failed to archive (weekly) '{exported_file}'")
+            logger.info("Failed to archive (weekly)", filename={exported_file})
 
     # Daily run of archiving utility
     # - Uploads changed files to S3
@@ -83,4 +85,4 @@ def arch_diffs(params, daily_arch_diff):
 
     # Report failures: someone should probably look at them
     for exported_file in fails:
-        print(f"Failed to archive (daily) '{exported_file}'")
+        logger.info("Failed to archive (daily)", filename={exported_file})
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index fa0226fcb..1673e79c1 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -62,7 +62,8 @@ def run_module(params: Dict[str, Any]):
     df_pull = pull_nchs_mortality_data(token, test_file)
     for metric in METRICS:
         if metric == 'percent_of_expected_deaths':
-            print(metric)
+            logger.info("Generating signal and exporting to CSV",
+                        metric = metric)
             df = df_pull.copy()
             df["val"] = df[metric]
             df["se"] = np.nan
@@ -80,7 +81,9 @@ def run_module(params: Dict[str, Any]):
                 stats.append((max(dates), len(dates)))
         else:
             for sensor in SENSORS:
-                print(metric, sensor)
+                logger.info("Generating signal and exporting to CSV",
+                            metric = metric,
+                            sensor = sensor)
                 df = df_pull.copy()
                 if sensor == "num":
                     df["val"] = df[metric]
diff --git a/quidel/delphi_quidel/data_tools.py b/quidel/delphi_quidel/data_tools.py
index 6f9cb45c9..c0ebeb750 100644
--- a/quidel/delphi_quidel/data_tools.py
+++ b/quidel/delphi_quidel/data_tools.py
@@ -86,8 +86,6 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs
             Same length as tests; proportion of parent observations to borrow.
     """
     if (np.any(np.isnan(tpooled_tests)) or np.any(np.isnan(tpooled_ptests))):
-        print(tpooled_tests)
-        print(tpooled_ptests)
         raise ValueError('[parent] tests should be non-negative '
                          'with no np.nan')
     if max_borrow_obs > min_obs:
@@ -153,7 +151,6 @@ def raw_positive_prop(positives, tests, min_obs):
     positives = positives.astype(float)
     tests = tests.astype(float)
     if np.any(np.isnan(positives)) or np.any(np.isnan(tests)):
-        print(positives, tests)
         raise ValueError('positives and tests should be non-negative '
                          'with no np.nan')
     if np.any(positives > tests):
diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py
index 6132ffe40..3a0c6f25f 100644
--- a/quidel/delphi_quidel/pull.py
+++ b/quidel/delphi_quidel/pull.py
@@ -82,7 +82,7 @@ def regulate_column_names(df, test_type):
     return df
 
 def get_from_email(column_names, start_dates, end_dates, mail_server,
-                   account, sender, password):
+                   account, sender, password, logger):
     """
     Get raw data from email account.
 
@@ -98,6 +98,8 @@ def get_from_email(column_names, start_dates, end_dates, mail_server,
             email account of the sender
         password: str
             password of the datadrop email
+        logger: logging.Logger
+            The structured logger.
 
     Returns:
         df: pd.DataFrame
@@ -131,7 +133,7 @@ def get_from_email(column_names, start_dates, end_dates, mail_server,
                     if not whether_in_range:
                         continue
 
-                    print(f"Pulling {test} data received on %s"%search_date.date())
+                    logger.info(f"Pulling data", test=test, date=search_date.date())
                     toread = io.BytesIO()
                     toread.write(att.payload)
                     toread.seek(0)  # reset the pointer
@@ -153,10 +155,9 @@ def fix_zipcode(df):
             zipcode = int(float(zipcode))
             zipcode5.append(zipcode)
     df['zip'] = zipcode5
-    # print('Fixing %.2f %% of the data' % (fixnum * 100 / len(zipcode5)))
     return df
 
-def fix_date(df):
+def fix_date(df, logger):
     """
     Remove invalid dates and select correct test date to use.
 
@@ -175,16 +176,16 @@ def fix_date(df):
     df.insert(2, "timestamp", df["TestDate"])
 
     mask = df["TestDate"] <= df["StorageDate"]
-    print("Removing %.2f%% of unusual data" % ((len(df) - np.sum(mask)) * 100 / len(df)))
+    logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data")
     df = df[mask]
 
     mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)
-    print("Fixing %.2f%% of outdated data" % (np.sum(mask) * 100 / len(df)))
+    logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data")
     df["timestamp"].values[mask] = df["StorageDate"].values[mask]
     return df
 
 def preprocess_new_data(start_dates, end_dates, mail_server, account,
-                        sender, password, test_mode):
+                        sender, password, test_mode, logger):
     """
     Pull and pre-process Quidel Antigen Test data from datadrop email.
 
@@ -206,6 +207,8 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,
             password of the datadrop email
         test_mode: bool
             pull raw data from email or not
+        logger: logging.Logger
+            The structured logger.
     Returns:
         df: pd.DataFrame
         time_flag: datetime.date:
@@ -220,7 +223,7 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,
     else:
         # Get new data from email
         dfs, time_flag = get_from_email(COLUMN_NAMES, start_dates, end_dates,
-                                       mail_server, account, sender, password)
+                                       mail_server, account, sender, password, logger)
 
     # No new data can be pulled
     if time_flag is None:
@@ -228,13 +231,12 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,
 
     df_finals = {}
     for test_type in TEST_TYPES:
-        print(f"For {test_type}:")
+        logger.info(f"For {test_type}:")
         df = dfs[test_type]
         # Fix some of the fipcodes that are 9 digit instead of 5 digit
         df = fix_zipcode(df)
         # Create a column CanonicalDate according to StarageDate and TestDate
-        df = fix_date(df)
-
+        df = fix_date(df, logger)
         # Compute numUniqueDevices
         numUniqueDevices = df.groupby(
             by=["timestamp", "zip"],
@@ -309,17 +311,15 @@ def check_intermediate_file(cache_dir, pull_start_dates):
                         sep=",", parse_dates=["timestamp"])
     return previous_dfs, pull_start_dates
 
-def pull_quidel_data(params):
+def pull_quidel_data(params, logger):
     """
     Pull new quidel test data and decide whether to combine it with historical records in ./cache.
 
     Parameters:
         params: dict
             including all the information read from params.json
-        END_FROM_TODAY_MINUS: int
-            report data until - X days
-        EXPORT_DAY_RANGE: int
-            number of dates to report
+        logger: logging.Logger
+            The structured logger.
 
     Returns:
         DataFrame:
@@ -355,7 +355,7 @@ def pull_quidel_data(params):
     # Use _end_date to check the most recent date that we received data
     dfs, _end_date = preprocess_new_data(
             pull_start_dates, pull_end_dates, mail_server,
-            account, sender, password, test_mode)
+            account, sender, password, test_mode, logger)
 
     # Utilize previously stored data
     for test_type in TEST_TYPES:
diff --git a/quidel/delphi_quidel/run.py b/quidel/delphi_quidel/run.py
index 49f6ec66b..cd83d746a 100644
--- a/quidel/delphi_quidel/run.py
+++ b/quidel/delphi_quidel/run.py
@@ -63,9 +63,9 @@ def run_module(params: Dict[str, Any]):
     )
 
     # Pull data and update export date
-    dfs, _end_date = pull_quidel_data(params["indicator"])
+    dfs, _end_date = pull_quidel_data(params["indicator"], logger)
     if _end_date is None:
-        print("The data is up-to-date. Currently, no new data to be ingested.")
+        logger.info("The data is up-to-date. Currently, no new data to be ingested.")
         return
     export_end_dates = check_export_end_date(export_end_dates, _end_date,
                                              END_FROM_TODAY_MINUS)
@@ -81,7 +81,6 @@ def run_module(params: Dict[str, Any]):
     for sensor in sensors:
         # Check either covid_ag or flu_ag
         test_type = "covid_ag" if "covid_ag" in sensor else "flu_ag"
-        print("state", sensor)
         data = dfs[test_type].copy()
         state_groups = geo_map("state", data, map_df).groupby("state_id")
         first_date, last_date = data["timestamp"].min(), data["timestamp"].max()
@@ -97,7 +96,9 @@ def run_module(params: Dict[str, Any]):
 
         # County/HRR/MSA level
         for geo_res in GEO_RESOLUTIONS:
-            print(geo_res, sensor)
+            logger.info("Generating signal and exporting to CSV",
+                        geo_res = geo_res,
+                        sensor = sensor)
             data = dfs[test_type].copy()
             data, res_key = geo_map(geo_res, data, map_df)
             res_df = generate_sensor_for_other_geores(
diff --git a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
index 54995dc90..fac0b58b2 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
@@ -92,8 +92,6 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
 
     """
     if (np.any(np.isnan(tpooled_tests)) or np.any(np.isnan(tpooled_ptests))):
-        print(tpooled_tests)
-        print(tpooled_ptests)
         raise ValueError('[parent] tests should be non-negative '
                          'with no np.nan')
     # STEP 1: "TOP UP" USING PARENT LOCATION
@@ -156,7 +154,6 @@ def raw_positive_prop(positives, tests, min_obs):
     positives = positives.astype(float)
     tests = tests.astype(float)
     if np.any(np.isnan(positives)) or np.any(np.isnan(tests)):
-        print(positives, tests)
         raise ValueError('positives and tests should be non-negative '
                          'with no np.nan')
     if np.any(positives > tests):
diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py
index fe042ed38..9ce036e10 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/pull.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 
-def get_from_s3(start_date, end_date, bucket):
+def get_from_s3(start_date, end_date, bucket, logger):
     """
     Get raw data from aws s3 bucket.
 
@@ -19,6 +19,8 @@ def get_from_s3(start_date, end_date, bucket):
             pull data from file tagged with date on/before the end date
         bucket: s3.Bucket
             the aws s3 bucket that stores quidel data
+        logger: logging.Logger
+            The structured logger.
     output:
         df: pd.DataFrame
         time_flag: datetime.datetime
@@ -49,7 +51,7 @@ def get_from_s3(start_date, end_date, bucket):
     for search_date in [start_date + timedelta(days=x) for x in range(n_days)]:
         if search_date in s3_files.keys():
             # Avoid appending duplicate datasets
-            print("Pulling data received on %s"%search_date.date())
+            logger.info(f"Pulling data received on {search_date.date()}")
 
             # Fetch data received on the same day
             for fn in s3_files[search_date]:
@@ -76,10 +78,9 @@ def fix_zipcode(df):
             zipcode = int(float(zipcode))
             zipcode5.append(zipcode)
     df['zip'] = zipcode5
-    # print('Fixing %.2f %% of the data' % (fixnum * 100 / len(zipcode5)))
     return df
 
-def fix_date(df):
+def fix_date(df, logger):
     """
     Remove invalid dates and select correct test date to use.
 
@@ -98,15 +99,15 @@ def fix_date(df):
     df.insert(2, "timestamp", df["TestDate"])
 
     mask = df["TestDate"] <= df["StorageDate"]
-    print("Removing %.2f%% of unusual data" % ((len(df) - np.sum(mask)) * 100 / len(df)))
+    logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data")
     df = df[mask]
 
     mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)
-    print("Fixing %.2f%% of outdated data" % (np.sum(mask) * 100 / len(df)))
+    logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data")
     df["timestamp"].values[mask] = df["StorageDate"].values[mask]
     return df
 
-def preprocess_new_data(start_date, end_date, params, test_mode):
+def preprocess_new_data(start_date, end_date, params, test_mode, logger):
     """
     Pull and pre-process Quidel Covid Test data.
 
@@ -123,6 +124,8 @@ def preprocess_new_data(start_date, end_date, params, test_mode):
             read from params.json
         test_mode: bool
             pull raw data from s3 or not
+        logger: logging.Logger
+            The structured logger.
     output:
         df: pd.DataFrame
         time_flag: datetime.date:
@@ -144,7 +147,7 @@ def preprocess_new_data(start_date, end_date, params, test_mode):
                             aws_secret_access_key=aws_secret_access_key)
         bucket = s3.Bucket(bucket_name)
         # Get new data from s3
-        df, time_flag = get_from_s3(start_date, end_date, bucket)
+        df, time_flag = get_from_s3(start_date, end_date, bucket, logger)
 
     # No new data can be pulled
     if time_flag is None:
@@ -154,7 +157,7 @@ def preprocess_new_data(start_date, end_date, params, test_mode):
     df = fix_zipcode(df)
 
     # Create a column CanonicalDate according to StarageDate and TestDate
-    df = fix_date(df)
+    df = fix_date(df, logger)
 
     # Compute overallPositive
     overall_pos = df[df["OverallResult"] == "positive"].groupby(
@@ -197,7 +200,7 @@ def check_intermediate_file(cache_dir, pull_start_date):
             return previous_df, pull_start_date
     return None, pull_start_date
 
-def pull_quidel_covidtest(params):
+def pull_quidel_covidtest(params, logger):
     """Pull the quidel covid test data.
 
     Conditionally merge new data with historical data from ./cache.
@@ -205,10 +208,8 @@ def pull_quidel_covidtest(params):
     Parameters:
         params: dict
             including all the information read from params.json
-        end_from_today_minus: int
-            report data until - X days
-        export_day_range: int
-            number of dates to report
+        logger: logging.Logger
+            The structured logger.
 
     Returns:
         DataFrame:
@@ -237,7 +238,7 @@ def pull_quidel_covidtest(params):
     # Pull data from the file at 5 digit zipcode level
     # Use _end_date to check the most recent date that we received data
     df, _end_date = preprocess_new_data(
-            pull_start_date, pull_end_date, params, test_mode)
+            pull_start_date, pull_end_date, params, test_mode, logger)
 
     # Utilize previously stored data
     if previous_df is not None:
diff --git a/quidel_covidtest/delphi_quidel_covidtest/run.py b/quidel_covidtest/delphi_quidel_covidtest/run.py
index d82f80135..5f084440c 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/run.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/run.py
@@ -76,9 +76,9 @@ def run_module(params: Dict[str, Any]):
     export_day_range = params["indicator"]["export_day_range"]
 
     # Pull data and update export date
-    df, _end_date = pull_quidel_covidtest(params["indicator"])
+    df, _end_date = pull_quidel_covidtest(params["indicator"], logger)
     if _end_date is None:
-        print("The data is up-to-date. Currently, no new data to be ingested.")
+        logger.info("The data is up-to-date. Currently, no new data to be ingested.")
         return
     export_end_date = check_export_end_date(export_end_date, _end_date,
                                             END_FROM_TODAY_MINUS)
@@ -98,7 +98,9 @@ def run_module(params: Dict[str, Any]):
         geo_data, res_key = geo_map(geo_res, data)
         geo_groups = geo_data.groupby(res_key)
         for sensor in sensors:
-            print(geo_res, sensor)
+            logger.info("Generating signal and exporting to CSV",
+                        geo_res=geo_res,
+                        sensor=sensor)
             if sensor.endswith(SMOOTHED_POSITIVE):
                 smoothers[sensor] = smoothers.pop(SMOOTHED_POSITIVE)
             elif sensor.endswith(RAW_POSITIVE):
@@ -125,7 +127,9 @@ def run_module(params: Dict[str, Any]):
     for geo_res in PARENT_GEO_RESOLUTIONS:
         geo_data, res_key = geo_map(geo_res, data)
         for sensor in sensors:
-            print(geo_res, sensor)
+            logger.info("Generating signal and exporting to CSV",
+                        geo_res=geo_res,
+                        sensor=sensor)
             res_df = generate_sensor_for_parent_geo(
                 geo_groups, geo_data, res_key, smooth=smoothers[sensor][1],
                 device=smoothers[sensor][0], first_date=first_date,
diff --git a/safegraph_patterns/delphi_safegraph_patterns/process.py b/safegraph_patterns/delphi_safegraph_patterns/process.py
index 1445ce028..330cf6762 100644
--- a/safegraph_patterns/delphi_safegraph_patterns/process.py
+++ b/safegraph_patterns/delphi_safegraph_patterns/process.py
@@ -125,7 +125,7 @@ def aggregate(df, metric, geo_res):
     return df.rename({geo_key: "geo_id"}, axis=1)
 
 def process(fname, sensors, metrics, geo_resolutions,
-            export_dir, brand_df, stats):
+            export_dir, brand_df, stats, logger):
     """
     Process an input census block group-level CSV and export it.
 
@@ -135,16 +135,20 @@ def process(fname, sensors, metrics, geo_resolutions,
     ----------
     fname: str
         Input filename.
-    metrics: List[Tuple[str, bool]]
-        List of (metric_name, wip).
     sensors: List[str]
         List of (sensor)
+    metrics: List[Tuple[str, bool]]
+        List of (metric_name, wip).
     geo_resolutions: List[str]
         List of geo resolutions to export the data.
+    export_dir: str
+        The directory to export files to.
     brand_df: pd.DataFrame
         mapping info from naics_code to safegraph_brand_id
     stats: List[Tuple[datetime, int]]
         List to which we will add (max export date, number of export dates)
+    logger: logging.Logger
+        The structured logger.
 
     Returns
     -------
@@ -164,7 +168,7 @@ def process(fname, sensors, metrics, geo_resolutions,
                          usecols=used_cols,
                          parse_dates=["date_range_start", "date_range_end"])
         dfs = construct_signals(df, metric_names, naics_codes, brand_df)
-        print("Finished pulling data from " + fname)
+        logger.info("Finished pulling data.", filename=fname)
     else:
         files = glob.glob(f'{fname}/**/*.csv.gz', recursive=True)
         dfs_dict = {"bars_visit": [], "restaurants_visit": []}
@@ -180,9 +184,11 @@ def process(fname, sensors, metrics, geo_resolutions,
             ).groupby(["timestamp", "zip"]).sum().reset_index()
         dfs["restaurants_visit"] = pd.concat(dfs_dict["restaurants_visit"]
             ).groupby(["timestamp", "zip"]).sum().reset_index()
-        print("Finished pulling data from " + fname)
+        logger.info("Finished pulling data.", filename=fname)
     for geo_res, sensor in product(geo_resolutions, sensors):
         for metric, wip in zip(metric_names, wips):
+            logger.info("Generating signal and exporting to CSV",
+                        geo_res=geo_res, metric=metric, sensor=sensor)
             df_export = aggregate(dfs[metric], metric, geo_res)
             df_export["val"] = df_export["_".join([metric, sensor])]
             df_export["se"] = np.nan
diff --git a/safegraph_patterns/delphi_safegraph_patterns/run.py b/safegraph_patterns/delphi_safegraph_patterns/run.py
index ffb0e4eb7..6eb474b9b 100644
--- a/safegraph_patterns/delphi_safegraph_patterns/run.py
+++ b/safegraph_patterns/delphi_safegraph_patterns/run.py
@@ -101,7 +101,8 @@ def run_module(params):
                                sensors=SENSORS,
                                geo_resolutions=GEO_RESOLUTIONS,
                                export_dir=export_dir,
-                               stats=stats
+                               stats=stats,
+                               logger=logger,
                                )
 
         with mp.Pool(n_core) as pool:
diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py
index 90c11e28a..4c659679a 100644
--- a/usafacts/delphi_usafacts/run.py
+++ b/usafacts/delphi_usafacts/run.py
@@ -98,7 +98,7 @@ def run_module(params: Dict[str, Dict[str, Any]]):
             METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS):
         if "cumulative" in sensor and "seven_day_average" in smoother:
             continue
-        logger.info("generating signal and exporting to CSV",
+        logger.info("Generating signal and exporting to CSV",
             geo_res = geo_res,
             metric = metric,
             sensor = sensor,

From f2f55d3b216257f150fea5e9113a38db433d51f4 Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 12:00:16 -0700
Subject: [PATCH 03/12] lint

---
 nchs_mortality/delphi_nchs_mortality/run.py | 2 +-
 quidel/delphi_quidel/pull.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 1673e79c1..1cf3d36d5 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -110,7 +110,7 @@ def run_module(params: Dict[str, Any]):
 #     - Uploads changed files to S3
 #     - Does not export any issues into receiving
     if "archive" in params:
-        arch_diffs(params, daily_arch_diff)
+        arch_diffs(params, daily_arch_diff, logger)
 
     elapsed_time_in_seconds = round(time.time() - start_time, 2)
     min_max_date = stats and min(s[0] for s in stats)
diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py
index 3a0c6f25f..1643f9304 100644
--- a/quidel/delphi_quidel/pull.py
+++ b/quidel/delphi_quidel/pull.py
@@ -133,7 +133,7 @@ def get_from_email(column_names, start_dates, end_dates, mail_server,
                     if not whether_in_range:
                         continue
 
-                    logger.info(f"Pulling data", test=test, date=search_date.date())
+                    logger.info("Pulling data", test=test, date=search_date.date())
                     toread = io.BytesIO()
                     toread.write(att.payload)
                     toread.seek(0)  # reset the pointer

From 23c134ad77b3f0dcd44d5078a0d5521410f3fad9 Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 12:04:01 -0700
Subject: [PATCH 04/12] lint

---
 combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py
index bddd1833f..7fbaa2898 100755
--- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py
+++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py
@@ -134,7 +134,8 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca
     return unique_dates
 
 
-def combine_usafacts_and_jhu(signal, geo, date_range, logger, issue_range=None, fetcher=covidcast.signal):
+def combine_usafacts_and_jhu(signal, geo, date_range, logger,
+                             issue_range=None, fetcher=covidcast.signal):
     """Add rows for PR from JHU signals to USA-FACTS signals.
 
     For hhs and nation, fetch the county `num` data so we can compute the proportions correctly

From f5825512f8b98fb9b50eee2d7cffde1d94fbaf83 Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 12:10:44 -0700
Subject: [PATCH 05/12] Fix missing logger in tests

---
 quidel/tests/test_pull.py           | 5 ++++-
 quidel_covidtest/tests/test_pull.py | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/quidel/tests/test_pull.py b/quidel/tests/test_pull.py
index 435624f7e..bf27b0bd6 100644
--- a/quidel/tests/test_pull.py
+++ b/quidel/tests/test_pull.py
@@ -1,3 +1,4 @@
+import logging
 from datetime import datetime
 
 import pandas as pd
@@ -36,6 +37,8 @@ def test_fix_date(self):
 class TestingPullData:
     def test_pull_quidel_data(self):
 
+        logger = logging.Logger("test_logger")
+
         dfs, _ = pull_quidel_data({
             "static_file_dir": "../static",
             "input_cache_dir": "./cache",
@@ -49,7 +52,7 @@ def test_pull_quidel_data(self):
             "sender": "",
             "wip_signal": [""],
             "test_mode": True
-        })
+        }, logger)
 
         # For covid_ag
         df = dfs["covid_ag"]
diff --git a/quidel_covidtest/tests/test_pull.py b/quidel_covidtest/tests/test_pull.py
index 48bb48d14..acdae32fe 100644
--- a/quidel_covidtest/tests/test_pull.py
+++ b/quidel_covidtest/tests/test_pull.py
@@ -1,3 +1,4 @@
+import logging
 from datetime import datetime
 
 import pandas as pd
@@ -36,6 +37,8 @@ def test_fix_date(self):
 class TestingPullData:
     def test_pull_quidel_covidtest(self):
 
+        logger = logging.Logger("test_logger")
+
         df, _ = pull_quidel_covidtest({
             "static_file_dir": "../static",
             "input_cache_dir": "./cache",
@@ -50,7 +53,7 @@ def test_pull_quidel_covidtest(self):
             "bucket_name": "",
             "wip_signal": "",
             "test_mode": True
-        })
+        }, logger)
 
         first_date = df["timestamp"].min().date()
         last_date = df["timestamp"].max().date()

From bb3121d36ba9af18eac334d174ba03a3729db236 Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 12:22:02 -0700
Subject: [PATCH 06/12] Fix missing logger in tests

---
 combo_cases_and_deaths/tests/test_run.py | 14 ++++++++------
 quidel/tests/test_pull.py                |  7 +++----
 quidel_covidtest/tests/test_pull.py      |  8 ++++----
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py
index 8d03627d4..3432d22d1 100644
--- a/combo_cases_and_deaths/tests/test_run.py
+++ b/combo_cases_and_deaths/tests/test_run.py
@@ -1,4 +1,5 @@
 """Tests for running combo cases and deaths indicator."""
+import logging
 from datetime import date
 from itertools import product
 import os
@@ -17,6 +18,7 @@
     COLUMN_MAPPING)
 from delphi_combo_cases_and_deaths.constants import METRICS, SMOOTH_TYPES, SENSORS
 
+LOGGER = logging.Logger("test_logger")
 
 def test_issue_dates():
     """The smoothed value for a particular date is computed from the raw
@@ -98,7 +100,7 @@ def make_mock(geo):
                 ("1 1", 4, 1 if geo in ["nation", "hhs"] else 2),
                 ("0 0", 2, 0)
         ]:
-            df = combine_usafacts_and_jhu("", geo, date_range, fetcher=mock_covidcast_signal)
+            df = combine_usafacts_and_jhu("", geo, date_range, LOGGER, fetcher=mock_covidcast_signal)
             assert df.size == expected_size * len(COLUMN_MAPPING), f"""
 Wrong number of rows in combined data frame for the number of available signals.
 
@@ -126,7 +128,7 @@ def test_multiple_issues(mock_covidcast_signal):
         }),
         None
     ] * 2
-    result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), fetcher=mock_covidcast_signal)
+    result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal)
     pd.testing.assert_frame_equal(
         result,
         pd.DataFrame(
@@ -186,7 +188,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
     ] * 6 # each call to combine_usafacts_and_jhu makes (2 + 2 * len(unique_timestamps)) = 12 calls to the fetcher
 
     pd.testing.assert_frame_equal(
-        combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal),
+        combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal),
         pd.DataFrame({"timestamp": [20200101],
                       "geo_id": ["us"],
                       "val": [50 + 100 + 200],
@@ -194,7 +196,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
                       "sample_size": [None]})
     )
     pd.testing.assert_frame_equal(
-        combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal),
+        combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal),
         pd.DataFrame({"timestamp": [20200101],
                       "geo_id": ["us"],
                       "val": [(50 + 100 + 200) / (4903185 + 3723066) * 100000],
@@ -202,7 +204,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
                       "sample_size": [None]})
     )
     pd.testing.assert_frame_equal(
-        combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), fetcher=mock_covidcast_signal),
+        combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal),
         pd.DataFrame({"geo_id": ["01000", "01001", "72001"],
                       "val": [50, 100, 200],
                       "timestamp": [20200101, 20200101, 20200101]},
@@ -229,7 +231,7 @@ def test_no_nation_jhu(mock_covidcast_signal):
                       "value": [1],
                       "timestamp": [20200101]})
     ]
-    result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal)
+    result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal)
 
     assert mock_covidcast_signal.call_args_list[-1] == call(
         "jhu-csse",
diff --git a/quidel/tests/test_pull.py b/quidel/tests/test_pull.py
index bf27b0bd6..1f36fb85e 100644
--- a/quidel/tests/test_pull.py
+++ b/quidel/tests/test_pull.py
@@ -14,6 +14,7 @@
 
 END_FROM_TODAY_MINUS = 5
 EXPORT_DAY_RANGE = 40
+LOGGER = logging.Logger("test_logger")
 
 class TestFixData:
     def test_fix_zipcode(self):
@@ -29,7 +30,7 @@ def test_fix_date(self):
                                           datetime(2020, 6, 14), datetime(2020, 7, 10)],
                            "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10),
                                           datetime(2020, 6, 11), datetime(2020, 7, 2)]})
-        df = fix_date(df)
+        df = fix_date(df, LOGGER)
 
         assert set(df["timestamp"]) == set([datetime(2020, 5, 19),
                                             datetime(2020, 6, 11), datetime(2020, 7, 2)])
@@ -37,8 +38,6 @@ def test_fix_date(self):
 class TestingPullData:
     def test_pull_quidel_data(self):
 
-        logger = logging.Logger("test_logger")
-
         dfs, _ = pull_quidel_data({
             "static_file_dir": "../static",
             "input_cache_dir": "./cache",
@@ -52,7 +51,7 @@ def test_pull_quidel_data(self):
             "sender": "",
             "wip_signal": [""],
             "test_mode": True
-        }, logger)
+        }, LOGGER)
 
         # For covid_ag
         df = dfs["covid_ag"]
diff --git a/quidel_covidtest/tests/test_pull.py b/quidel_covidtest/tests/test_pull.py
index acdae32fe..01df4c7c6 100644
--- a/quidel_covidtest/tests/test_pull.py
+++ b/quidel_covidtest/tests/test_pull.py
@@ -15,6 +15,8 @@
 END_FROM_TODAY_MINUS = 5
 EXPORT_DAY_RANGE = 40
 
+LOGGER = logging.Logger("test_logger")
+
 class TestFixData:
     def test_fix_zipcode(self):
 
@@ -29,7 +31,7 @@ def test_fix_date(self):
                                           datetime(2020, 6, 14), datetime(2020, 7, 10)],
                            "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10),
                                           datetime(2020, 6, 11), datetime(2020, 7, 2)]})
-        df = fix_date(df)
+        df = fix_date(df, LOGGER)
 
         assert set(df["timestamp"]) == set([datetime(2020, 5, 19),
                                             datetime(2020, 6, 11), datetime(2020, 7, 2)])
@@ -37,8 +39,6 @@ def test_fix_date(self):
 class TestingPullData:
     def test_pull_quidel_covidtest(self):
 
-        logger = logging.Logger("test_logger")
-
         df, _ = pull_quidel_covidtest({
             "static_file_dir": "../static",
             "input_cache_dir": "./cache",
@@ -53,7 +53,7 @@ def test_pull_quidel_covidtest(self):
             "bucket_name": "",
             "wip_signal": "",
             "test_mode": True
-        }, logger)
+        }, LOGGER)
 
         first_date = df["timestamp"].min().date()
         last_date = df["timestamp"].max().date()

From 39e7dff42c94d6c4cc9eb14413e93933f3a6d9ae Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 12:33:18 -0700
Subject: [PATCH 07/12] Instantiate logger correctly in tests

---
 combo_cases_and_deaths/tests/test_run.py | 14 +++++++-------
 quidel/tests/test_pull.py                |  6 +++---
 quidel_covidtest/tests/test_pull.py      |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py
index 3432d22d1..e83af9abb 100644
--- a/combo_cases_and_deaths/tests/test_run.py
+++ b/combo_cases_and_deaths/tests/test_run.py
@@ -18,7 +18,7 @@
     COLUMN_MAPPING)
 from delphi_combo_cases_and_deaths.constants import METRICS, SMOOTH_TYPES, SENSORS
 
-LOGGER = logging.Logger("test_logger")
+TEST_LOGGER = logging.getLogger()
 
 def test_issue_dates():
     """The smoothed value for a particular date is computed from the raw
@@ -100,7 +100,7 @@ def make_mock(geo):
                 ("1 1", 4, 1 if geo in ["nation", "hhs"] else 2),
                 ("0 0", 2, 0)
         ]:
-            df = combine_usafacts_and_jhu("", geo, date_range, LOGGER, fetcher=mock_covidcast_signal)
+            df = combine_usafacts_and_jhu("", geo, date_range, TEST_LOGGER, fetcher=mock_covidcast_signal)
             assert df.size == expected_size * len(COLUMN_MAPPING), f"""
 Wrong number of rows in combined data frame for the number of available signals.
 
@@ -128,7 +128,7 @@ def test_multiple_issues(mock_covidcast_signal):
         }),
         None
     ] * 2
-    result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal)
+    result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal)
     pd.testing.assert_frame_equal(
         result,
         pd.DataFrame(
@@ -188,7 +188,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
     ] * 6 # each call to combine_usafacts_and_jhu makes (2 + 2 * len(unique_timestamps)) = 12 calls to the fetcher
 
     pd.testing.assert_frame_equal(
-        combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal),
+        combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal),
         pd.DataFrame({"timestamp": [20200101],
                       "geo_id": ["us"],
                       "val": [50 + 100 + 200],
@@ -196,7 +196,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
                       "sample_size": [None]})
     )
     pd.testing.assert_frame_equal(
-        combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal),
+        combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal),
         pd.DataFrame({"timestamp": [20200101],
                       "geo_id": ["us"],
                       "val": [(50 + 100 + 200) / (4903185 + 3723066) * 100000],
@@ -204,7 +204,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal):
                       "sample_size": [None]})
     )
     pd.testing.assert_frame_equal(
-        combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal),
+        combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal),
         pd.DataFrame({"geo_id": ["01000", "01001", "72001"],
                       "val": [50, 100, 200],
                       "timestamp": [20200101, 20200101, 20200101]},
@@ -231,7 +231,7 @@ def test_no_nation_jhu(mock_covidcast_signal):
                       "value": [1],
                       "timestamp": [20200101]})
     ]
-    result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal)
+    result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal)
 
     assert mock_covidcast_signal.call_args_list[-1] == call(
         "jhu-csse",
diff --git a/quidel/tests/test_pull.py b/quidel/tests/test_pull.py
index 1f36fb85e..17f596ce9 100644
--- a/quidel/tests/test_pull.py
+++ b/quidel/tests/test_pull.py
@@ -14,7 +14,7 @@
 
 END_FROM_TODAY_MINUS = 5
 EXPORT_DAY_RANGE = 40
-LOGGER = logging.Logger("test_logger")
+TEST_LOGGER = logging.getLogger()
 
 class TestFixData:
     def test_fix_zipcode(self):
@@ -30,7 +30,7 @@ def test_fix_date(self):
                                           datetime(2020, 6, 14), datetime(2020, 7, 10)],
                            "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10),
                                           datetime(2020, 6, 11), datetime(2020, 7, 2)]})
-        df = fix_date(df, LOGGER)
+        df = fix_date(df, TEST_LOGGER)
 
         assert set(df["timestamp"]) == set([datetime(2020, 5, 19),
                                             datetime(2020, 6, 11), datetime(2020, 7, 2)])
@@ -51,7 +51,7 @@ def test_pull_quidel_data(self):
             "sender": "",
             "wip_signal": [""],
             "test_mode": True
-        }, LOGGER)
+        }, TEST_LOGGER)
 
         # For covid_ag
         df = dfs["covid_ag"]
diff --git a/quidel_covidtest/tests/test_pull.py b/quidel_covidtest/tests/test_pull.py
index 01df4c7c6..17ddbb6fd 100644
--- a/quidel_covidtest/tests/test_pull.py
+++ b/quidel_covidtest/tests/test_pull.py
@@ -15,7 +15,7 @@
 END_FROM_TODAY_MINUS = 5
 EXPORT_DAY_RANGE = 40
 
-LOGGER = logging.Logger("test_logger")
+TEST_LOGGER = logging.getLogger()
 
 class TestFixData:
     def test_fix_zipcode(self):
@@ -31,7 +31,7 @@ def test_fix_date(self):
                                           datetime(2020, 6, 14), datetime(2020, 7, 10)],
                            "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10),
                                           datetime(2020, 6, 11), datetime(2020, 7, 2)]})
-        df = fix_date(df, LOGGER)
+        df = fix_date(df, TEST_LOGGER)
 
         assert set(df["timestamp"]) == set([datetime(2020, 5, 19),
                                             datetime(2020, 6, 11), datetime(2020, 7, 2)])
@@ -53,7 +53,7 @@ def test_pull_quidel_covidtest(self):
             "bucket_name": "",
             "wip_signal": "",
             "test_mode": True
-        }, LOGGER)
+        }, TEST_LOGGER)
 
         first_date = df["timestamp"].min().date()
         last_date = df["timestamp"].max().date()

From af7a90ebed462e9a3653325d4ffea7d85396e8cd Mon Sep 17 00:00:00 2001
From: alexcoda <alexcoda92@gmail.com>
Date: Sun, 19 Sep 2021 12:38:08 -0700
Subject: [PATCH 08/12] Fix error check

---
 quidel/delphi_quidel/data_tools.py                     | 4 ++--
 quidel_covidtest/delphi_quidel_covidtest/data_tools.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/quidel/delphi_quidel/data_tools.py b/quidel/delphi_quidel/data_tools.py
index c0ebeb750..92afb2159 100644
--- a/quidel/delphi_quidel/data_tools.py
+++ b/quidel/delphi_quidel/data_tools.py
@@ -287,9 +287,9 @@ def raw_tests_per_device(devices, tests, min_obs):
     """
     devices = devices.astype(float)
     tests = tests.astype(float)
-    if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices):
+    if np.any(np.isnan(devices)) or np.any(devices < 0):
         raise ValueError("devices should be non-negative with no np.nan")
-    if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests):
+    if np.any(np.isnan(tests)) or np.any(tests < 0):
         raise ValueError("tests should be non-negative with no np.nan")
     if min_obs <= 0:
         raise ValueError('min_obs should be positive')
diff --git a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
index fac0b58b2..f89a353ed 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py
@@ -293,9 +293,9 @@ def raw_tests_per_device(devices, tests, min_obs):
     """
     devices = devices.astype(float)
     tests = tests.astype(float)
-    if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices):
+    if np.any(np.isnan(devices)) or np.any(devices < 0):
         raise ValueError("devices should be non-negative with no np.nan")
-    if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests):
+    if np.any(np.isnan(tests)) or np.any(tests < 0):
         raise ValueError("tests should be non-negative with no np.nan")
     if min_obs <= 0:
         raise ValueError('min_obs should be positive')

From 5f5b292e2c661198fb2a77f2504e194cf884801f Mon Sep 17 00:00:00 2001
From: Alex Coda <alexcoda92@gmail.com>
Date: Tue, 21 Sep 2021 17:34:46 -0700
Subject: [PATCH 09/12] Update quidel/delphi_quidel/pull.py

Co-authored-by: Katie Mazaitis <krivard@cs.cmu.edu>
---
 quidel/delphi_quidel/pull.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py
index 1643f9304..ffbba3283 100644
--- a/quidel/delphi_quidel/pull.py
+++ b/quidel/delphi_quidel/pull.py
@@ -176,7 +176,7 @@ def fix_date(df, logger):
     df.insert(2, "timestamp", df["TestDate"])
 
     mask = df["TestDate"] <= df["StorageDate"]
-    logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data")
+    logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f}% of unusual data")
     df = df[mask]
 
     mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)

From 18ae0729b9450e9159fef5faaeea9152ff28d98e Mon Sep 17 00:00:00 2001
From: Alex Coda <alexcoda92@gmail.com>
Date: Tue, 21 Sep 2021 17:34:50 -0700
Subject: [PATCH 10/12] Update quidel/delphi_quidel/pull.py

Co-authored-by: Katie Mazaitis <krivard@cs.cmu.edu>
---
 quidel/delphi_quidel/pull.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py
index ffbba3283..f168b3355 100644
--- a/quidel/delphi_quidel/pull.py
+++ b/quidel/delphi_quidel/pull.py
@@ -180,7 +180,7 @@ def fix_date(df, logger):
     df = df[mask]
 
     mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)
-    logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data")
+    logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f}% of outdated data")
     df["timestamp"].values[mask] = df["StorageDate"].values[mask]
     return df
 

From 1aa181c3151baf5d431f1c3a772634f7a37311f4 Mon Sep 17 00:00:00 2001
From: Alex Coda <alexcoda92@gmail.com>
Date: Tue, 21 Sep 2021 17:34:57 -0700
Subject: [PATCH 11/12] Update quidel_covidtest/delphi_quidel_covidtest/pull.py

Co-authored-by: Katie Mazaitis <krivard@cs.cmu.edu>
---
 quidel_covidtest/delphi_quidel_covidtest/pull.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py
index 9ce036e10..b5f9eb9d0 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/pull.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py
@@ -99,7 +99,7 @@ def fix_date(df, logger):
     df.insert(2, "timestamp", df["TestDate"])
 
     mask = df["TestDate"] <= df["StorageDate"]
-    logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data")
+    logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f}% of unusual data")
     df = df[mask]
 
     mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)

From b6361dec6ff234a170974ffe59543b3ed71e181e Mon Sep 17 00:00:00 2001
From: Alex Coda <alexcoda92@gmail.com>
Date: Tue, 21 Sep 2021 17:35:03 -0700
Subject: [PATCH 12/12] Update quidel_covidtest/delphi_quidel_covidtest/pull.py

Co-authored-by: Katie Mazaitis <krivard@cs.cmu.edu>
---
 quidel_covidtest/delphi_quidel_covidtest/pull.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py
index b5f9eb9d0..3efa9ed23 100644
--- a/quidel_covidtest/delphi_quidel_covidtest/pull.py
+++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py
@@ -103,7 +103,7 @@ def fix_date(df, logger):
     df = df[mask]
 
     mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90)
-    logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data")
+    logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f}% of outdated data")
     df["timestamp"].values[mask] = df["StorageDate"].values[mask]
     return df