From efd9423e9e550e8bda38c0219c4670b4a43d11a5 Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 11:35:05 -0700 Subject: [PATCH 01/12] Fix value check in quidel data_tools --- quidel/delphi_quidel/data_tools.py | 9 ++++----- quidel_covidtest/delphi_quidel_covidtest/data_tools.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/quidel/delphi_quidel/data_tools.py b/quidel/delphi_quidel/data_tools.py index 5d67dd812..6f9cb45c9 100644 --- a/quidel/delphi_quidel/data_tools.py +++ b/quidel/delphi_quidel/data_tools.py @@ -290,11 +290,10 @@ def raw_tests_per_device(devices, tests, min_obs): """ devices = devices.astype(float) tests = tests.astype(float) - if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))): - print(devices) - print(tests) - raise ValueError('devices and tests should be non-negative ' - 'with no np.nan') + if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices): + raise ValueError("devices should be non-negative with no np.nan") + if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests): + raise ValueError("tests should be non-negative with no np.nan") if min_obs <= 0: raise ValueError('min_obs should be positive') tests[tests < min_obs] = np.nan diff --git a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py index 18898ec8e..54995dc90 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py +++ b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py @@ -296,11 +296,10 @@ def raw_tests_per_device(devices, tests, min_obs): """ devices = devices.astype(float) tests = tests.astype(float) - if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))): - print(devices) - print(tests) - raise ValueError('devices and tests should be non-negative ' - 'with no np.nan') + if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices): + raise ValueError("devices should be non-negative with no np.nan") + if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests): + raise ValueError("tests should be non-negative with no np.nan") if min_obs <= 0: raise ValueError('min_obs should be positive') tests[tests < min_obs] = np.nan From 77f26f7fefb05e4ed4fb536e2016a15639c2f97c Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 11:38:24 -0700 Subject: [PATCH 02/12] Replace print statements with logging And add log statements for data export variants --- .../delphi_combo_cases_and_deaths/run.py | 19 +++++------ covid_act_now/delphi_covid_act_now/run.py | 7 ++-- hhs_facilities/delphi_hhs_facilities/run.py | 3 ++ hhs_hosp/delphi_hhs/run.py | 4 +++ .../delphi_nchs_mortality/archive_diffs.py | 8 +++-- nchs_mortality/delphi_nchs_mortality/run.py | 7 ++-- quidel/delphi_quidel/data_tools.py | 3 -- quidel/delphi_quidel/pull.py | 34 +++++++++---------- quidel/delphi_quidel/run.py | 9 ++--- .../delphi_quidel_covidtest/data_tools.py | 3 -- .../delphi_quidel_covidtest/pull.py | 31 +++++++++-------- .../delphi_quidel_covidtest/run.py | 12 ++++--- .../delphi_safegraph_patterns/process.py | 16 ++++++--- .../delphi_safegraph_patterns/run.py | 3 +- usafacts/delphi_usafacts/run.py | 2 +- 15 files changed, 90 insertions(+), 71 deletions(-) diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py index c54f3f3be..bddd1833f 100755 --- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py +++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py @@ -33,14 +33,6 @@ covidcast.covidcast._ASYNC_CALL = True # pylint: disable=protected-access -def check_none_data_frame(data_frame, label, date_range): - """Log and return True when a data frame is None.""" - if data_frame is None: - print(f"{label} completely unavailable in range {date_range}") - return True - return False - - def maybe_append(usa_facts, jhu): """ Append dataframes if available, otherwise return USAFacts. @@ -133,7 +125,7 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca issues=issue_range ) - if check_none_data_frame(usafacts_df, "USA-FACTS", date_range): + if usafacts_df is None: return None merged_df = merge_dfs_by_geos(usafacts_df, jhu_df, geo) @@ -142,7 +134,7 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca return unique_dates -def combine_usafacts_and_jhu(signal, geo, date_range, issue_range=None, fetcher=covidcast.signal): +def combine_usafacts_and_jhu(signal, geo, date_range, logger, issue_range=None, fetcher=covidcast.signal): """Add rows for PR from JHU signals to USA-FACTS signals. For hhs and nation, fetch the county `num` data so we can compute the proportions correctly @@ -158,6 +150,7 @@ def combine_usafacts_and_jhu(signal, geo, date_range, issue_range=None, fetcher= # This occurs if the usafacts ~and the jhu query were empty if unique_dates is None: + logger.info("USA-FACTS completely unavailable for dates", date_range=date_range) return EMPTY_FRAME # Query only the represented window so that every geo is represented; a single window call is @@ -329,9 +322,15 @@ def run_module(params): log_exceptions=params["common"].get("log_exceptions", True)) for metric, geo_res, sensor_name, signal in variants: + logger.info("Generating signal and exporting to CSV", + geo_res = geo_res, + metric = metric, + sensor = sensor_name, + signal = signal) df = combine_usafacts_and_jhu(signal, geo_res, extend_raw_date_range(params, sensor_name), + logger, params['indicator']['issue_range']) df["timestamp"] = pd.to_datetime(df["timestamp"]) start_date = pd.to_datetime(params['indicator']['export_start_date']) diff --git a/covid_act_now/delphi_covid_act_now/run.py b/covid_act_now/delphi_covid_act_now/run.py index d9d983f0d..7cc96f6e4 100644 --- a/covid_act_now/delphi_covid_act_now/run.py +++ b/covid_act_now/delphi_covid_act_now/run.py @@ -45,7 +45,7 @@ def run_module(params): parquet_url = params["indicator"]["parquet_url"] # Load CAN county-level testing data - print("Pulling CAN data") + logger.info("Pulling CAN data") df_pq = load_data(parquet_url) df_county_testing = extract_testing_metrics(df_pq) @@ -54,7 +54,8 @@ def run_module(params): max_dates_exported = [] # Perform geo aggregations and export to receiving for geo_res in GEO_RESOLUTIONS: - print(f"Processing {geo_res}") + logger.info("Generating signal and exporting to CSV", + geo_res = geo_res) df = geo_map(df_county_testing, geo_res) # Export 'pcr_specimen_positivity_rate' @@ -79,7 +80,7 @@ def run_module(params): max_dates_exported.append(latest) # x2 to count both positivity and tests signals num_exported_files += exported_csv_dates.size * 2 - print(f"Exported dates: {earliest} to {latest}") + logger.info("Exported for dates between", earliest=earliest, latest=latest) elapsed_time_in_seconds = round(time.time() - start_time, 2) max_lag_in_days = (datetime.now() - min(max_dates_exported)).days diff --git a/hhs_facilities/delphi_hhs_facilities/run.py b/hhs_facilities/delphi_hhs_facilities/run.py index b41df5bcc..43d3a9bdd 100644 --- a/hhs_facilities/delphi_hhs_facilities/run.py +++ b/hhs_facilities/delphi_hhs_facilities/run.py @@ -36,6 +36,9 @@ def run_module(params) -> None: filled_fips_df = fill_missing_fips(raw_df, gmpr) stats = [] for geo, (sig_name, sig_cols, sig_func, sig_offset) in product(GEO_RESOLUTIONS, SIGNALS): + logger.info("Generating signal and exporting to CSV", + geo_res = geo, + signal_name = sig_name) mapped_df = convert_geo(filled_fips_df, geo, gmpr) output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset) dates = create_export_csv(output_df, params["common"]["export_dir"], geo, sig_name) diff --git a/hhs_hosp/delphi_hhs/run.py b/hhs_hosp/delphi_hhs/run.py index 6af654845..45c2f5bc1 100644 --- a/hhs_hosp/delphi_hhs/run.py +++ b/hhs_hosp/delphi_hhs/run.py @@ -105,6 +105,10 @@ def run_module(params): geo_mapper = GeoMapper() stats = [] for sensor, smoother, geo in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Generating signal and exporting to CSV", + geo_res = geo, + sensor = sensor, + smoother = smoother) df = geo_mapper.add_geocode(make_signal(all_columns, sensor), "state_id", "state_code", diff --git a/nchs_mortality/delphi_nchs_mortality/archive_diffs.py b/nchs_mortality/delphi_nchs_mortality/archive_diffs.py index 6524203b3..e8b790cee 100644 --- a/nchs_mortality/delphi_nchs_mortality/archive_diffs.py +++ b/nchs_mortality/delphi_nchs_mortality/archive_diffs.py @@ -8,7 +8,7 @@ from delphi_utils import S3ArchiveDiffer -def arch_diffs(params, daily_arch_diff): +def arch_diffs(params, daily_arch_diff, logger): """ Archive differences between new updates and existing data. @@ -23,6 +23,8 @@ def arch_diffs(params, daily_arch_diff): Read from params.json daily_arch_diff: S3ArchiveDiffer Used to store and update cache + logger: logging.Logger + The structured logger. """ weekly_export_dir = params["common"]["weekly_export_dir"] daily_export_dir = params["common"]["daily_export_dir"] @@ -59,7 +61,7 @@ def arch_diffs(params, daily_arch_diff): # Report failures: someone should probably look at them for exported_file in fails: - print(f"Failed to archive (weekly) '{exported_file}'") + logger.info("Failed to archive (weekly)", filename={exported_file}) # Daily run of archiving utility # - Uploads changed files to S3 @@ -83,4 +85,4 @@ def arch_diffs(params, daily_arch_diff): # Report failures: someone should probably look at them for exported_file in fails: - print(f"Failed to archive (daily) '{exported_file}'") + logger.info("Failed to archive (daily)", filename={exported_file}) diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py index fa0226fcb..1673e79c1 100644 --- a/nchs_mortality/delphi_nchs_mortality/run.py +++ b/nchs_mortality/delphi_nchs_mortality/run.py @@ -62,7 +62,8 @@ def run_module(params: Dict[str, Any]): df_pull = pull_nchs_mortality_data(token, test_file) for metric in METRICS: if metric == 'percent_of_expected_deaths': - print(metric) + logger.info("Generating signal and exporting to CSV", + metric = metric) df = df_pull.copy() df["val"] = df[metric] df["se"] = np.nan @@ -80,7 +81,9 @@ def run_module(params: Dict[str, Any]): stats.append((max(dates), len(dates))) else: for sensor in SENSORS: - print(metric, sensor) + logger.info("Generating signal and exporting to CSV", + metric = metric, + sensor = sensor) df = df_pull.copy() if sensor == "num": df["val"] = df[metric] diff --git a/quidel/delphi_quidel/data_tools.py b/quidel/delphi_quidel/data_tools.py index 6f9cb45c9..c0ebeb750 100644 --- a/quidel/delphi_quidel/data_tools.py +++ b/quidel/delphi_quidel/data_tools.py @@ -86,8 +86,6 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs Same length as tests; proportion of parent observations to borrow. """ if (np.any(np.isnan(tpooled_tests)) or np.any(np.isnan(tpooled_ptests))): - print(tpooled_tests) - print(tpooled_ptests) raise ValueError('[parent] tests should be non-negative ' 'with no np.nan') if max_borrow_obs > min_obs: @@ -153,7 +151,6 @@ def raw_positive_prop(positives, tests, min_obs): positives = positives.astype(float) tests = tests.astype(float) if np.any(np.isnan(positives)) or np.any(np.isnan(tests)): - print(positives, tests) raise ValueError('positives and tests should be non-negative ' 'with no np.nan') if np.any(positives > tests): diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py index 6132ffe40..3a0c6f25f 100644 --- a/quidel/delphi_quidel/pull.py +++ b/quidel/delphi_quidel/pull.py @@ -82,7 +82,7 @@ def regulate_column_names(df, test_type): return df def get_from_email(column_names, start_dates, end_dates, mail_server, - account, sender, password): + account, sender, password, logger): """ Get raw data from email account. @@ -98,6 +98,8 @@ def get_from_email(column_names, start_dates, end_dates, mail_server, email account of the sender password: str password of the datadrop email + logger: logging.Logger + The structured logger. Returns: df: pd.DataFrame @@ -131,7 +133,7 @@ def get_from_email(column_names, start_dates, end_dates, mail_server, if not whether_in_range: continue - print(f"Pulling {test} data received on %s"%search_date.date()) + logger.info(f"Pulling data", test=test, date=search_date.date()) toread = io.BytesIO() toread.write(att.payload) toread.seek(0) # reset the pointer @@ -153,10 +155,9 @@ def fix_zipcode(df): zipcode = int(float(zipcode)) zipcode5.append(zipcode) df['zip'] = zipcode5 - # print('Fixing %.2f %% of the data' % (fixnum * 100 / len(zipcode5))) return df -def fix_date(df): +def fix_date(df, logger): """ Remove invalid dates and select correct test date to use. @@ -175,16 +176,16 @@ def fix_date(df): df.insert(2, "timestamp", df["TestDate"]) mask = df["TestDate"] <= df["StorageDate"] - print("Removing %.2f%% of unusual data" % ((len(df) - np.sum(mask)) * 100 / len(df))) + logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data") df = df[mask] mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90) - print("Fixing %.2f%% of outdated data" % (np.sum(mask) * 100 / len(df))) + logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data") df["timestamp"].values[mask] = df["StorageDate"].values[mask] return df def preprocess_new_data(start_dates, end_dates, mail_server, account, - sender, password, test_mode): + sender, password, test_mode, logger): """ Pull and pre-process Quidel Antigen Test data from datadrop email. @@ -206,6 +207,8 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account, password of the datadrop email test_mode: bool pull raw data from email or not + logger: logging.Logger + The structured logger. Returns: df: pd.DataFrame time_flag: datetime.date: @@ -220,7 +223,7 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account, else: # Get new data from email dfs, time_flag = get_from_email(COLUMN_NAMES, start_dates, end_dates, - mail_server, account, sender, password) + mail_server, account, sender, password, logger) # No new data can be pulled if time_flag is None: @@ -228,13 +231,12 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account, df_finals = {} for test_type in TEST_TYPES: - print(f"For {test_type}:") + logger.info(f"For {test_type}:") df = dfs[test_type] # Fix some of the fipcodes that are 9 digit instead of 5 digit df = fix_zipcode(df) # Create a column CanonicalDate according to StarageDate and TestDate - df = fix_date(df) - + df = fix_date(df, logger) # Compute numUniqueDevices numUniqueDevices = df.groupby( by=["timestamp", "zip"], @@ -309,17 +311,15 @@ def check_intermediate_file(cache_dir, pull_start_dates): sep=",", parse_dates=["timestamp"]) return previous_dfs, pull_start_dates -def pull_quidel_data(params): +def pull_quidel_data(params, logger): """ Pull new quidel test data and decide whether to combine it with historical records in ./cache. Parameters: params: dict including all the information read from params.json - END_FROM_TODAY_MINUS: int - report data until - X days - EXPORT_DAY_RANGE: int - number of dates to report + logger: logging.Logger + The structured logger. Returns: DataFrame: @@ -355,7 +355,7 @@ def pull_quidel_data(params): # Use _end_date to check the most recent date that we received data dfs, _end_date = preprocess_new_data( pull_start_dates, pull_end_dates, mail_server, - account, sender, password, test_mode) + account, sender, password, test_mode, logger) # Utilize previously stored data for test_type in TEST_TYPES: diff --git a/quidel/delphi_quidel/run.py b/quidel/delphi_quidel/run.py index 49f6ec66b..cd83d746a 100644 --- a/quidel/delphi_quidel/run.py +++ b/quidel/delphi_quidel/run.py @@ -63,9 +63,9 @@ def run_module(params: Dict[str, Any]): ) # Pull data and update export date - dfs, _end_date = pull_quidel_data(params["indicator"]) + dfs, _end_date = pull_quidel_data(params["indicator"], logger) if _end_date is None: - print("The data is up-to-date. Currently, no new data to be ingested.") + logger.info("The data is up-to-date. Currently, no new data to be ingested.") return export_end_dates = check_export_end_date(export_end_dates, _end_date, END_FROM_TODAY_MINUS) @@ -81,7 +81,6 @@ def run_module(params: Dict[str, Any]): for sensor in sensors: # Check either covid_ag or flu_ag test_type = "covid_ag" if "covid_ag" in sensor else "flu_ag" - print("state", sensor) data = dfs[test_type].copy() state_groups = geo_map("state", data, map_df).groupby("state_id") first_date, last_date = data["timestamp"].min(), data["timestamp"].max() @@ -97,7 +96,9 @@ def run_module(params: Dict[str, Any]): # County/HRR/MSA level for geo_res in GEO_RESOLUTIONS: - print(geo_res, sensor) + logger.info("Generating signal and exporting to CSV", + geo_res = geo_res, + sensor = sensor) data = dfs[test_type].copy() data, res_key = geo_map(geo_res, data, map_df) res_df = generate_sensor_for_other_geores( diff --git a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py index 54995dc90..fac0b58b2 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py +++ b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py @@ -92,8 +92,6 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs): """ if (np.any(np.isnan(tpooled_tests)) or np.any(np.isnan(tpooled_ptests))): - print(tpooled_tests) - print(tpooled_ptests) raise ValueError('[parent] tests should be non-negative ' 'with no np.nan') # STEP 1: "TOP UP" USING PARENT LOCATION @@ -156,7 +154,6 @@ def raw_positive_prop(positives, tests, min_obs): positives = positives.astype(float) tests = tests.astype(float) if np.any(np.isnan(positives)) or np.any(np.isnan(tests)): - print(positives, tests) raise ValueError('positives and tests should be non-negative ' 'with no np.nan') if np.any(positives > tests): diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py index fe042ed38..9ce036e10 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/pull.py +++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py @@ -8,7 +8,7 @@ import pandas as pd import numpy as np -def get_from_s3(start_date, end_date, bucket): +def get_from_s3(start_date, end_date, bucket, logger): """ Get raw data from aws s3 bucket. @@ -19,6 +19,8 @@ def get_from_s3(start_date, end_date, bucket): pull data from file tagged with date on/before the end date bucket: s3.Bucket the aws s3 bucket that stores quidel data + logger: logging.Logger + The structured logger. output: df: pd.DataFrame time_flag: datetime.datetime @@ -49,7 +51,7 @@ def get_from_s3(start_date, end_date, bucket): for search_date in [start_date + timedelta(days=x) for x in range(n_days)]: if search_date in s3_files.keys(): # Avoid appending duplicate datasets - print("Pulling data received on %s"%search_date.date()) + logger.info(f"Pulling data received on {search_date.date()}") # Fetch data received on the same day for fn in s3_files[search_date]: @@ -76,10 +78,9 @@ def fix_zipcode(df): zipcode = int(float(zipcode)) zipcode5.append(zipcode) df['zip'] = zipcode5 - # print('Fixing %.2f %% of the data' % (fixnum * 100 / len(zipcode5))) return df -def fix_date(df): +def fix_date(df, logger): """ Remove invalid dates and select correct test date to use. @@ -98,15 +99,15 @@ def fix_date(df): df.insert(2, "timestamp", df["TestDate"]) mask = df["TestDate"] <= df["StorageDate"] - print("Removing %.2f%% of unusual data" % ((len(df) - np.sum(mask)) * 100 / len(df))) + logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data") df = df[mask] mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90) - print("Fixing %.2f%% of outdated data" % (np.sum(mask) * 100 / len(df))) + logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data") df["timestamp"].values[mask] = df["StorageDate"].values[mask] return df -def preprocess_new_data(start_date, end_date, params, test_mode): +def preprocess_new_data(start_date, end_date, params, test_mode, logger): """ Pull and pre-process Quidel Covid Test data. @@ -123,6 +124,8 @@ def preprocess_new_data(start_date, end_date, params, test_mode): read from params.json test_mode: bool pull raw data from s3 or not + logger: logging.Logger + The structured logger. output: df: pd.DataFrame time_flag: datetime.date: @@ -144,7 +147,7 @@ def preprocess_new_data(start_date, end_date, params, test_mode): aws_secret_access_key=aws_secret_access_key) bucket = s3.Bucket(bucket_name) # Get new data from s3 - df, time_flag = get_from_s3(start_date, end_date, bucket) + df, time_flag = get_from_s3(start_date, end_date, bucket, logger) # No new data can be pulled if time_flag is None: @@ -154,7 +157,7 @@ def preprocess_new_data(start_date, end_date, params, test_mode): df = fix_zipcode(df) # Create a column CanonicalDate according to StarageDate and TestDate - df = fix_date(df) + df = fix_date(df, logger) # Compute overallPositive overall_pos = df[df["OverallResult"] == "positive"].groupby( @@ -197,7 +200,7 @@ def check_intermediate_file(cache_dir, pull_start_date): return previous_df, pull_start_date return None, pull_start_date -def pull_quidel_covidtest(params): +def pull_quidel_covidtest(params, logger): """Pull the quidel covid test data. Conditionally merge new data with historical data from ./cache. @@ -205,10 +208,8 @@ def pull_quidel_covidtest(params): Parameters: params: dict including all the information read from params.json - end_from_today_minus: int - report data until - X days - export_day_range: int - number of dates to report + logger: logging.Logger + The structured logger. Returns: DataFrame: @@ -237,7 +238,7 @@ def pull_quidel_covidtest(params): # Pull data from the file at 5 digit zipcode level # Use _end_date to check the most recent date that we received data df, _end_date = preprocess_new_data( - pull_start_date, pull_end_date, params, test_mode) + pull_start_date, pull_end_date, params, test_mode, logger) # Utilize previously stored data if previous_df is not None: diff --git a/quidel_covidtest/delphi_quidel_covidtest/run.py b/quidel_covidtest/delphi_quidel_covidtest/run.py index d82f80135..5f084440c 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/run.py +++ b/quidel_covidtest/delphi_quidel_covidtest/run.py @@ -76,9 +76,9 @@ def run_module(params: Dict[str, Any]): export_day_range = params["indicator"]["export_day_range"] # Pull data and update export date - df, _end_date = pull_quidel_covidtest(params["indicator"]) + df, _end_date = pull_quidel_covidtest(params["indicator"], logger) if _end_date is None: - print("The data is up-to-date. Currently, no new data to be ingested.") + logger.info("The data is up-to-date. Currently, no new data to be ingested.") return export_end_date = check_export_end_date(export_end_date, _end_date, END_FROM_TODAY_MINUS) @@ -98,7 +98,9 @@ def run_module(params: Dict[str, Any]): geo_data, res_key = geo_map(geo_res, data) geo_groups = geo_data.groupby(res_key) for sensor in sensors: - print(geo_res, sensor) + logger.info("Generating signal and exporting to CSV", + geo_res=geo_res, + sensor=sensor) if sensor.endswith(SMOOTHED_POSITIVE): smoothers[sensor] = smoothers.pop(SMOOTHED_POSITIVE) elif sensor.endswith(RAW_POSITIVE): @@ -125,7 +127,9 @@ def run_module(params: Dict[str, Any]): for geo_res in PARENT_GEO_RESOLUTIONS: geo_data, res_key = geo_map(geo_res, data) for sensor in sensors: - print(geo_res, sensor) + logger.info("Generating signal and exporting to CSV", + geo_res=geo_res, + sensor=sensor) res_df = generate_sensor_for_parent_geo( geo_groups, geo_data, res_key, smooth=smoothers[sensor][1], device=smoothers[sensor][0], first_date=first_date, diff --git a/safegraph_patterns/delphi_safegraph_patterns/process.py b/safegraph_patterns/delphi_safegraph_patterns/process.py index 1445ce028..330cf6762 100644 --- a/safegraph_patterns/delphi_safegraph_patterns/process.py +++ b/safegraph_patterns/delphi_safegraph_patterns/process.py @@ -125,7 +125,7 @@ def aggregate(df, metric, geo_res): return df.rename({geo_key: "geo_id"}, axis=1) def process(fname, sensors, metrics, geo_resolutions, - export_dir, brand_df, stats): + export_dir, brand_df, stats, logger): """ Process an input census block group-level CSV and export it. @@ -135,16 +135,20 @@ def process(fname, sensors, metrics, geo_resolutions, ---------- fname: str Input filename. - metrics: List[Tuple[str, bool]] - List of (metric_name, wip). sensors: List[str] List of (sensor) + metrics: List[Tuple[str, bool]] + List of (metric_name, wip). geo_resolutions: List[str] List of geo resolutions to export the data. + export_dir: str + The directory to export files to. brand_df: pd.DataFrame mapping info from naics_code to safegraph_brand_id stats: List[Tuple[datetime, int]] List to which we will add (max export date, number of export dates) + logger: logging.Logger + The structured logger. Returns ------- @@ -164,7 +168,7 @@ def process(fname, sensors, metrics, geo_resolutions, usecols=used_cols, parse_dates=["date_range_start", "date_range_end"]) dfs = construct_signals(df, metric_names, naics_codes, brand_df) - print("Finished pulling data from " + fname) + logger.info("Finished pulling data.", filename=fname) else: files = glob.glob(f'{fname}/**/*.csv.gz', recursive=True) dfs_dict = {"bars_visit": [], "restaurants_visit": []} @@ -180,9 +184,11 @@ def process(fname, sensors, metrics, geo_resolutions, ).groupby(["timestamp", "zip"]).sum().reset_index() dfs["restaurants_visit"] = pd.concat(dfs_dict["restaurants_visit"] ).groupby(["timestamp", "zip"]).sum().reset_index() - print("Finished pulling data from " + fname) + logger.info("Finished pulling data.", filename=fname) for geo_res, sensor in product(geo_resolutions, sensors): for metric, wip in zip(metric_names, wips): + logger.info("Generating signal and exporting to CSV", + geo_res=geo_res, metric=metric, sensor=sensor) df_export = aggregate(dfs[metric], metric, geo_res) df_export["val"] = df_export["_".join([metric, sensor])] df_export["se"] = np.nan diff --git a/safegraph_patterns/delphi_safegraph_patterns/run.py b/safegraph_patterns/delphi_safegraph_patterns/run.py index ffb0e4eb7..6eb474b9b 100644 --- a/safegraph_patterns/delphi_safegraph_patterns/run.py +++ b/safegraph_patterns/delphi_safegraph_patterns/run.py @@ -101,7 +101,8 @@ def run_module(params): sensors=SENSORS, geo_resolutions=GEO_RESOLUTIONS, export_dir=export_dir, - stats=stats + stats=stats, + logger=logger, ) with mp.Pool(n_core) as pool: diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index 90c11e28a..4c659679a 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -98,7 +98,7 @@ def run_module(params: Dict[str, Dict[str, Any]]): METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS): if "cumulative" in sensor and "seven_day_average" in smoother: continue - logger.info("generating signal and exporting to CSV", + logger.info("Generating signal and exporting to CSV", geo_res = geo_res, metric = metric, sensor = sensor, From f2f55d3b216257f150fea5e9113a38db433d51f4 Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 12:00:16 -0700 Subject: [PATCH 03/12] lint --- nchs_mortality/delphi_nchs_mortality/run.py | 2 +- quidel/delphi_quidel/pull.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py index 1673e79c1..1cf3d36d5 100644 --- a/nchs_mortality/delphi_nchs_mortality/run.py +++ b/nchs_mortality/delphi_nchs_mortality/run.py @@ -110,7 +110,7 @@ def run_module(params: Dict[str, Any]): # - Uploads changed files to S3 # - Does not export any issues into receiving if "archive" in params: - arch_diffs(params, daily_arch_diff) + arch_diffs(params, daily_arch_diff, logger) elapsed_time_in_seconds = round(time.time() - start_time, 2) min_max_date = stats and min(s[0] for s in stats) diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py index 3a0c6f25f..1643f9304 100644 --- a/quidel/delphi_quidel/pull.py +++ b/quidel/delphi_quidel/pull.py @@ -133,7 +133,7 @@ def get_from_email(column_names, start_dates, end_dates, mail_server, if not whether_in_range: continue - logger.info(f"Pulling data", test=test, date=search_date.date()) + logger.info("Pulling data", test=test, date=search_date.date()) toread = io.BytesIO() toread.write(att.payload) toread.seek(0) # reset the pointer From 23c134ad77b3f0dcd44d5078a0d5521410f3fad9 Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 12:04:01 -0700 Subject: [PATCH 04/12] lint --- combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py index bddd1833f..7fbaa2898 100755 --- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py +++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py @@ -134,7 +134,8 @@ def get_updated_dates(signal, geo, date_range, issue_range=None, fetcher=covidca return unique_dates -def combine_usafacts_and_jhu(signal, geo, date_range, logger, issue_range=None, fetcher=covidcast.signal): +def combine_usafacts_and_jhu(signal, geo, date_range, logger, + issue_range=None, fetcher=covidcast.signal): """Add rows for PR from JHU signals to USA-FACTS signals. For hhs and nation, fetch the county `num` data so we can compute the proportions correctly From f5825512f8b98fb9b50eee2d7cffde1d94fbaf83 Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 12:10:44 -0700 Subject: [PATCH 05/12] Fix missing logger in tests --- quidel/tests/test_pull.py | 5 ++++- quidel_covidtest/tests/test_pull.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/quidel/tests/test_pull.py b/quidel/tests/test_pull.py index 435624f7e..bf27b0bd6 100644 --- a/quidel/tests/test_pull.py +++ b/quidel/tests/test_pull.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime import pandas as pd @@ -36,6 +37,8 @@ def test_fix_date(self): class TestingPullData: def test_pull_quidel_data(self): + logger = logging.Logger("test_logger") + dfs, _ = pull_quidel_data({ "static_file_dir": "../static", "input_cache_dir": "./cache", @@ -49,7 +52,7 @@ def test_pull_quidel_data(self): "sender": "", "wip_signal": [""], "test_mode": True - }) + }, logger) # For covid_ag df = dfs["covid_ag"] diff --git a/quidel_covidtest/tests/test_pull.py b/quidel_covidtest/tests/test_pull.py index 48bb48d14..acdae32fe 100644 --- a/quidel_covidtest/tests/test_pull.py +++ b/quidel_covidtest/tests/test_pull.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime import pandas as pd @@ -36,6 +37,8 @@ def test_fix_date(self): class TestingPullData: def test_pull_quidel_covidtest(self): + logger = logging.Logger("test_logger") + df, _ = pull_quidel_covidtest({ "static_file_dir": "../static", "input_cache_dir": "./cache", @@ -50,7 +53,7 @@ def test_pull_quidel_covidtest(self): "bucket_name": "", "wip_signal": "", "test_mode": True - }) + }, logger) first_date = df["timestamp"].min().date() last_date = df["timestamp"].max().date() From bb3121d36ba9af18eac334d174ba03a3729db236 Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 12:22:02 -0700 Subject: [PATCH 06/12] Fix missing logger in tests --- combo_cases_and_deaths/tests/test_run.py | 14 ++++++++------ quidel/tests/test_pull.py | 7 +++---- quidel_covidtest/tests/test_pull.py | 8 ++++---- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py index 8d03627d4..3432d22d1 100644 --- a/combo_cases_and_deaths/tests/test_run.py +++ b/combo_cases_and_deaths/tests/test_run.py @@ -1,4 +1,5 @@ """Tests for running combo cases and deaths indicator.""" +import logging from datetime import date from itertools import product import os @@ -17,6 +18,7 @@ COLUMN_MAPPING) from delphi_combo_cases_and_deaths.constants import METRICS, SMOOTH_TYPES, SENSORS +LOGGER = logging.Logger("test_logger") def test_issue_dates(): """The smoothed value for a particular date is computed from the raw @@ -98,7 +100,7 @@ def make_mock(geo): ("1 1", 4, 1 if geo in ["nation", "hhs"] else 2), ("0 0", 2, 0) ]: - df = combine_usafacts_and_jhu("", geo, date_range, fetcher=mock_covidcast_signal) + df = combine_usafacts_and_jhu("", geo, date_range, LOGGER, fetcher=mock_covidcast_signal) assert df.size == expected_size * len(COLUMN_MAPPING), f""" Wrong number of rows in combined data frame for the number of available signals. @@ -126,7 +128,7 @@ def test_multiple_issues(mock_covidcast_signal): }), None ] * 2 - result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), fetcher=mock_covidcast_signal) + result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal) pd.testing.assert_frame_equal( result, pd.DataFrame( @@ -186,7 +188,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal): ] * 6 # each call to combine_usafacts_and_jhu makes (2 + 2 * len(unique_timestamps)) = 12 calls to the fetcher pd.testing.assert_frame_equal( - combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal), + combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal), pd.DataFrame({"timestamp": [20200101], "geo_id": ["us"], "val": [50 + 100 + 200], @@ -194,7 +196,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal): "sample_size": [None]}) ) pd.testing.assert_frame_equal( - combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal), + combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal), pd.DataFrame({"timestamp": [20200101], "geo_id": ["us"], "val": [(50 + 100 + 200) / (4903185 + 3723066) * 100000], @@ -202,7 +204,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal): "sample_size": [None]}) ) pd.testing.assert_frame_equal( - combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), fetcher=mock_covidcast_signal), + combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal), pd.DataFrame({"geo_id": ["01000", "01001", "72001"], "val": [50, 100, 200], "timestamp": [20200101, 20200101, 20200101]}, @@ -229,7 +231,7 @@ def test_no_nation_jhu(mock_covidcast_signal): "value": [1], "timestamp": [20200101]}) ] - result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), fetcher=mock_covidcast_signal) + result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal) assert mock_covidcast_signal.call_args_list[-1] == call( "jhu-csse", diff --git a/quidel/tests/test_pull.py b/quidel/tests/test_pull.py index bf27b0bd6..1f36fb85e 100644 --- a/quidel/tests/test_pull.py +++ b/quidel/tests/test_pull.py @@ -14,6 +14,7 @@ END_FROM_TODAY_MINUS = 5 EXPORT_DAY_RANGE = 40 +LOGGER = logging.Logger("test_logger") class TestFixData: def test_fix_zipcode(self): @@ -29,7 +30,7 @@ def test_fix_date(self): datetime(2020, 6, 14), datetime(2020, 7, 10)], "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10), datetime(2020, 6, 11), datetime(2020, 7, 2)]}) - df = fix_date(df) + df = fix_date(df, LOGGER) assert set(df["timestamp"]) == set([datetime(2020, 5, 19), datetime(2020, 6, 11), datetime(2020, 7, 2)]) @@ -37,8 +38,6 @@ def test_fix_date(self): class TestingPullData: def test_pull_quidel_data(self): - logger = logging.Logger("test_logger") - dfs, _ = pull_quidel_data({ "static_file_dir": "../static", "input_cache_dir": "./cache", @@ -52,7 +51,7 @@ def test_pull_quidel_data(self): "sender": "", "wip_signal": [""], "test_mode": True - }, logger) + }, LOGGER) # For covid_ag df = dfs["covid_ag"] diff --git a/quidel_covidtest/tests/test_pull.py b/quidel_covidtest/tests/test_pull.py index acdae32fe..01df4c7c6 100644 --- a/quidel_covidtest/tests/test_pull.py +++ b/quidel_covidtest/tests/test_pull.py @@ -15,6 +15,8 @@ END_FROM_TODAY_MINUS = 5 EXPORT_DAY_RANGE = 40 +LOGGER = logging.Logger("test_logger") + class TestFixData: def test_fix_zipcode(self): @@ -29,7 +31,7 @@ def test_fix_date(self): datetime(2020, 6, 14), datetime(2020, 7, 10)], "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10), datetime(2020, 6, 11), datetime(2020, 7, 2)]}) - df = fix_date(df) + df = fix_date(df, LOGGER) assert set(df["timestamp"]) == set([datetime(2020, 5, 19), datetime(2020, 6, 11), datetime(2020, 7, 2)]) @@ -37,8 +39,6 @@ def test_fix_date(self): class TestingPullData: def test_pull_quidel_covidtest(self): - logger = logging.Logger("test_logger") - df, _ = pull_quidel_covidtest({ "static_file_dir": "../static", "input_cache_dir": "./cache", @@ -53,7 +53,7 @@ def test_pull_quidel_covidtest(self): "bucket_name": "", "wip_signal": "", "test_mode": True - }, logger) + }, LOGGER) first_date = df["timestamp"].min().date() last_date = df["timestamp"].max().date() From 39e7dff42c94d6c4cc9eb14413e93933f3a6d9ae Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 12:33:18 -0700 Subject: [PATCH 07/12] Instantiate logger correctly in tests --- combo_cases_and_deaths/tests/test_run.py | 14 +++++++------- quidel/tests/test_pull.py | 6 +++--- quidel_covidtest/tests/test_pull.py | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py index 3432d22d1..e83af9abb 100644 --- a/combo_cases_and_deaths/tests/test_run.py +++ b/combo_cases_and_deaths/tests/test_run.py @@ -18,7 +18,7 @@ COLUMN_MAPPING) from delphi_combo_cases_and_deaths.constants import METRICS, SMOOTH_TYPES, SENSORS -LOGGER = logging.Logger("test_logger") +TEST_LOGGER = logging.getLogger() def test_issue_dates(): """The smoothed value for a particular date is computed from the raw @@ -100,7 +100,7 @@ def make_mock(geo): ("1 1", 4, 1 if geo in ["nation", "hhs"] else 2), ("0 0", 2, 0) ]: - df = combine_usafacts_and_jhu("", geo, date_range, LOGGER, fetcher=mock_covidcast_signal) + df = combine_usafacts_and_jhu("", geo, date_range, TEST_LOGGER, fetcher=mock_covidcast_signal) assert df.size == expected_size * len(COLUMN_MAPPING), f""" Wrong number of rows in combined data frame for the number of available signals. @@ -128,7 +128,7 @@ def test_multiple_issues(mock_covidcast_signal): }), None ] * 2 - result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal) + result = combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal) pd.testing.assert_frame_equal( result, pd.DataFrame( @@ -188,7 +188,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal): ] * 6 # each call to combine_usafacts_and_jhu makes (2 + 2 * len(unique_timestamps)) = 12 calls to the fetcher pd.testing.assert_frame_equal( - combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal), + combine_usafacts_and_jhu("confirmed_incidence_num", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal), pd.DataFrame({"timestamp": [20200101], "geo_id": ["us"], "val": [50 + 100 + 200], @@ -196,7 +196,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal): "sample_size": [None]}) ) pd.testing.assert_frame_equal( - combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal), + combine_usafacts_and_jhu("confirmed_incidence_prop", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal), pd.DataFrame({"timestamp": [20200101], "geo_id": ["us"], "val": [(50 + 100 + 200) / (4903185 + 3723066) * 100000], @@ -204,7 +204,7 @@ def test_combine_usafacts_and_jhu_special_geos(mock_covidcast_signal): "sample_size": [None]}) ) pd.testing.assert_frame_equal( - combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal), + combine_usafacts_and_jhu("confirmed_incidence_num", "county", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal), pd.DataFrame({"geo_id": ["01000", "01001", "72001"], "val": [50, 100, 200], "timestamp": [20200101, 20200101, 20200101]}, @@ -231,7 +231,7 @@ def test_no_nation_jhu(mock_covidcast_signal): "value": [1], "timestamp": [20200101]}) ] - result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=LOGGER, fetcher=mock_covidcast_signal) + result = combine_usafacts_and_jhu("_num", "nation", date_range=(0, 1), logger=TEST_LOGGER, fetcher=mock_covidcast_signal) assert mock_covidcast_signal.call_args_list[-1] == call( "jhu-csse", diff --git a/quidel/tests/test_pull.py b/quidel/tests/test_pull.py index 1f36fb85e..17f596ce9 100644 --- a/quidel/tests/test_pull.py +++ b/quidel/tests/test_pull.py @@ -14,7 +14,7 @@ END_FROM_TODAY_MINUS = 5 EXPORT_DAY_RANGE = 40 -LOGGER = logging.Logger("test_logger") +TEST_LOGGER = logging.getLogger() class TestFixData: def test_fix_zipcode(self): @@ -30,7 +30,7 @@ def test_fix_date(self): datetime(2020, 6, 14), datetime(2020, 7, 10)], "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10), datetime(2020, 6, 11), datetime(2020, 7, 2)]}) - df = fix_date(df, LOGGER) + df = fix_date(df, TEST_LOGGER) assert set(df["timestamp"]) == set([datetime(2020, 5, 19), datetime(2020, 6, 11), datetime(2020, 7, 2)]) @@ -51,7 +51,7 @@ def test_pull_quidel_data(self): "sender": "", "wip_signal": [""], "test_mode": True - }, LOGGER) + }, TEST_LOGGER) # For covid_ag df = dfs["covid_ag"] diff --git a/quidel_covidtest/tests/test_pull.py b/quidel_covidtest/tests/test_pull.py index 01df4c7c6..17ddbb6fd 100644 --- a/quidel_covidtest/tests/test_pull.py +++ b/quidel_covidtest/tests/test_pull.py @@ -15,7 +15,7 @@ END_FROM_TODAY_MINUS = 5 EXPORT_DAY_RANGE = 40 -LOGGER = logging.Logger("test_logger") +TEST_LOGGER = logging.getLogger() class TestFixData: def test_fix_zipcode(self): @@ -31,7 +31,7 @@ def test_fix_date(self): datetime(2020, 6, 14), datetime(2020, 7, 10)], "TestDate":[datetime(2020, 1, 19), datetime(2020, 6, 10), datetime(2020, 6, 11), datetime(2020, 7, 2)]}) - df = fix_date(df, LOGGER) + df = fix_date(df, TEST_LOGGER) assert set(df["timestamp"]) == set([datetime(2020, 5, 19), datetime(2020, 6, 11), datetime(2020, 7, 2)]) @@ -53,7 +53,7 @@ def test_pull_quidel_covidtest(self): "bucket_name": "", "wip_signal": "", "test_mode": True - }, LOGGER) + }, TEST_LOGGER) first_date = df["timestamp"].min().date() last_date = df["timestamp"].max().date() From af7a90ebed462e9a3653325d4ffea7d85396e8cd Mon Sep 17 00:00:00 2001 From: alexcoda Date: Sun, 19 Sep 2021 12:38:08 -0700 Subject: [PATCH 08/12] Fix error check --- quidel/delphi_quidel/data_tools.py | 4 ++-- quidel_covidtest/delphi_quidel_covidtest/data_tools.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/quidel/delphi_quidel/data_tools.py b/quidel/delphi_quidel/data_tools.py index c0ebeb750..92afb2159 100644 --- a/quidel/delphi_quidel/data_tools.py +++ b/quidel/delphi_quidel/data_tools.py @@ -287,9 +287,9 @@ def raw_tests_per_device(devices, tests, min_obs): """ devices = devices.astype(float) tests = tests.astype(float) - if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices): + if np.any(np.isnan(devices)) or np.any(devices < 0): raise ValueError("devices should be non-negative with no np.nan") - if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests): + if np.any(np.isnan(tests)) or np.any(tests < 0): raise ValueError("tests should be non-negative with no np.nan") if min_obs <= 0: raise ValueError('min_obs should be positive') diff --git a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py index fac0b58b2..f89a353ed 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/data_tools.py +++ b/quidel_covidtest/delphi_quidel_covidtest/data_tools.py @@ -293,9 +293,9 @@ def raw_tests_per_device(devices, tests, min_obs): """ devices = devices.astype(float) tests = tests.astype(float) - if np.any(np.isnan(devices)) or np.any(d < 0 for d in devices): + if np.any(np.isnan(devices)) or np.any(devices < 0): raise ValueError("devices should be non-negative with no np.nan") - if np.any(np.isnan(tests)) or np.any(d < 0 for d in tests): + if np.any(np.isnan(tests)) or np.any(tests < 0): raise ValueError("tests should be non-negative with no np.nan") if min_obs <= 0: raise ValueError('min_obs should be positive') From 5f5b292e2c661198fb2a77f2504e194cf884801f Mon Sep 17 00:00:00 2001 From: Alex Coda Date: Tue, 21 Sep 2021 17:34:46 -0700 Subject: [PATCH 09/12] Update quidel/delphi_quidel/pull.py Co-authored-by: Katie Mazaitis --- quidel/delphi_quidel/pull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py index 1643f9304..ffbba3283 100644 --- a/quidel/delphi_quidel/pull.py +++ b/quidel/delphi_quidel/pull.py @@ -176,7 +176,7 @@ def fix_date(df, logger): df.insert(2, "timestamp", df["TestDate"]) mask = df["TestDate"] <= df["StorageDate"] - logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data") + logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f}% of unusual data") df = df[mask] mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90) From 18ae0729b9450e9159fef5faaeea9152ff28d98e Mon Sep 17 00:00:00 2001 From: Alex Coda Date: Tue, 21 Sep 2021 17:34:50 -0700 Subject: [PATCH 10/12] Update quidel/delphi_quidel/pull.py Co-authored-by: Katie Mazaitis --- quidel/delphi_quidel/pull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quidel/delphi_quidel/pull.py b/quidel/delphi_quidel/pull.py index ffbba3283..f168b3355 100644 --- a/quidel/delphi_quidel/pull.py +++ b/quidel/delphi_quidel/pull.py @@ -180,7 +180,7 @@ def fix_date(df, logger): df = df[mask] mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90) - logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data") + logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f}% of outdated data") df["timestamp"].values[mask] = df["StorageDate"].values[mask] return df From 1aa181c3151baf5d431f1c3a772634f7a37311f4 Mon Sep 17 00:00:00 2001 From: Alex Coda Date: Tue, 21 Sep 2021 17:34:57 -0700 Subject: [PATCH 11/12] Update quidel_covidtest/delphi_quidel_covidtest/pull.py Co-authored-by: Katie Mazaitis --- quidel_covidtest/delphi_quidel_covidtest/pull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py index 9ce036e10..b5f9eb9d0 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/pull.py +++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py @@ -99,7 +99,7 @@ def fix_date(df, logger): df.insert(2, "timestamp", df["TestDate"]) mask = df["TestDate"] <= df["StorageDate"] - logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f} of unusual data") + logger.info(f"Removing {((len(df) - np.sum(mask)) * 100 / len(df)):.2f}% of unusual data") df = df[mask] mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90) From b6361dec6ff234a170974ffe59543b3ed71e181e Mon Sep 17 00:00:00 2001 From: Alex Coda Date: Tue, 21 Sep 2021 17:35:03 -0700 Subject: [PATCH 12/12] Update quidel_covidtest/delphi_quidel_covidtest/pull.py Co-authored-by: Katie Mazaitis --- quidel_covidtest/delphi_quidel_covidtest/pull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py index b5f9eb9d0..3efa9ed23 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/pull.py +++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py @@ -103,7 +103,7 @@ def fix_date(df, logger): df = df[mask] mask = df["StorageDate"] - df["TestDate"] > pd.Timedelta(days=90) - logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f} of outdated data") + logger.info(f"Fixing {(np.sum(mask) * 100 / len(df)):.2f}% of outdated data") df["timestamp"].values[mask] = df["StorageDate"].values[mask] return df