From 43885e281ec40c86c90b27a5f0a3defd7d2087d4 Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Fri, 16 Dec 2022 10:37:48 -0500 Subject: [PATCH 1/8] Fix validation steps for no-output case --- .../delphi_utils/validator/static.py | 35 ++++++++++--------- .../delphi_utils/validator/utils.py | 9 ++--- .../delphi_dsew_community_profile/run.py | 3 ++ 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py index 48b17b888..669b333c2 100644 --- a/_delphi_utils_python/delphi_utils/validator/static.py +++ b/_delphi_utils_python/delphi_utils/validator/static.py @@ -93,25 +93,26 @@ def check_missing_date_files(self, daily_filenames, report): # Create set of all dates seen in CSV names. unique_dates = {datetime.strptime( daily_filename[0][0:8], '%Y%m%d').date() for daily_filename in daily_filenames} - - # Diff expected and observed dates. - expected_dates = self.params.time_window.date_seq - - if len(self.params.max_expected_lag) == 0: - max_expected_lag_overall = 10 - else: - max_expected_lag_overall = max(self.params.max_expected_lag.values()) - - # Only check for date if it should definitely be present, - # i.e if it is more than max_expected_lag since the checking date - expected_dates = [date for date in expected_dates if + # Check if there any dates in unique_dates before checking if any dates are missing + # Validation will be skipped if there is no CSV export + if len(unique_dates) > 0: + # Diff expected and observed dates. + expected_dates = self.params.time_window.date_seq + if len(self.params.max_expected_lag) == 0: + max_expected_lag_overall = 10 + else: + max_expected_lag_overall = max(self.params.max_expected_lag.values()) + + # Only check for date if it should definitely be present, + # i.e if it is more than max_expected_lag since the checking date + expected_dates = [date for date in expected_dates if ((datetime.today().date() - date).days) > max_expected_lag_overall] - check_dateholes = list(set(expected_dates).difference(unique_dates)) - check_dateholes.sort() + check_dateholes = list(set(expected_dates).difference(unique_dates)) + check_dateholes.sort() - if check_dateholes: - report.add_raised_error( - ValidationFailure("check_missing_date_files", + if check_dateholes: + report.add_raised_error( + ValidationFailure("check_missing_date_files", message="Missing dates are observed; if these dates are already " "in the API they would not be updated")) diff --git a/_delphi_utils_python/delphi_utils/validator/utils.py b/_delphi_utils_python/delphi_utils/validator/utils.py index eb2b694e1..fa3f0ca4d 100644 --- a/_delphi_utils_python/delphi_utils/validator/utils.py +++ b/_delphi_utils_python/delphi_utils/validator/utils.py @@ -76,10 +76,11 @@ def aggregate_frames(frames_list): df['time_value'] = datetime.strptime( match.groupdict()['date'], "%Y%m%d").date() df['signal'] = match.groupdict()['signal'] - - all_frames.append(df) - - return pd.concat(all_frames).reset_index(drop=True) + # Check if df is empty before concat + if len(df) > 0: + all_frames.append(df) + all_frames = pd.concat(all_frames).reset_index(drop=True) + return all_frames def lag_converter(lag_dict): """Convert a dictionary of lag values into the proper format. diff --git a/dsew_community_profile/delphi_dsew_community_profile/run.py b/dsew_community_profile/delphi_dsew_community_profile/run.py index 6de1443a7..ba4041630 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/run.py +++ b/dsew_community_profile/delphi_dsew_community_profile/run.py @@ -125,3 +125,6 @@ def replace_date_param(p): csv_export_count = csv_export_count, max_lag_in_days = max_lag_in_days, oldest_final_export_date = formatted_min_max_date) + # Print warning if no CSV export + if csv_export_count == 0: + logger.warning("No CSV output - manual validation may be needed") From 1b9b95f41126b31df8c777a83f0773bcb0517238 Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Tue, 20 Dec 2022 13:33:23 -0500 Subject: [PATCH 2/8] Empty dir check made into a warning in validation --- _delphi_utils_python/delphi_utils/runner.py | 1 - _delphi_utils_python/delphi_utils/validator/static.py | 6 +++++- _delphi_utils_python/delphi_utils/validator/utils.py | 9 ++++----- _delphi_utils_python/delphi_utils/validator/validate.py | 6 +++++- _delphi_utils_python/tests/validator/test_static.py | 4 ++-- .../delphi_dsew_community_profile/run.py | 3 --- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/runner.py b/_delphi_utils_python/delphi_utils/runner.py index c86177c58..d21a67be5 100644 --- a/_delphi_utils_python/delphi_utils/runner.py +++ b/_delphi_utils_python/delphi_utils/runner.py @@ -71,7 +71,6 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None], if "delivery" in params: transfer_files() - if __name__ == "__main__": parser = ap.ArgumentParser() parser.add_argument("indicator_name", diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py index 669b333c2..65f83c00b 100644 --- a/_delphi_utils_python/delphi_utils/validator/static.py +++ b/_delphi_utils_python/delphi_utils/validator/static.py @@ -95,7 +95,11 @@ def check_missing_date_files(self, daily_filenames, report): daily_filename[0][0:8], '%Y%m%d').date() for daily_filename in daily_filenames} # Check if there any dates in unique_dates before checking if any dates are missing # Validation will be skipped if there is no CSV export - if len(unique_dates) > 0: + if len(unique_dates) == 0: + report.add_raised_warning( + ValidationFailure("check_empty_filelist", + message="No files found in export directory")) + else: # Diff expected and observed dates. expected_dates = self.params.time_window.date_seq if len(self.params.max_expected_lag) == 0: diff --git a/_delphi_utils_python/delphi_utils/validator/utils.py b/_delphi_utils_python/delphi_utils/validator/utils.py index fa3f0ca4d..eb2b694e1 100644 --- a/_delphi_utils_python/delphi_utils/validator/utils.py +++ b/_delphi_utils_python/delphi_utils/validator/utils.py @@ -76,11 +76,10 @@ def aggregate_frames(frames_list): df['time_value'] = datetime.strptime( match.groupdict()['date'], "%Y%m%d").date() df['signal'] = match.groupdict()['signal'] - # Check if df is empty before concat - if len(df) > 0: - all_frames.append(df) - all_frames = pd.concat(all_frames).reset_index(drop=True) - return all_frames + + all_frames.append(df) + + return pd.concat(all_frames).reset_index(drop=True) def lag_converter(lag_dict): """Convert a dictionary of lag values into the proper format. diff --git a/_delphi_utils_python/delphi_utils/validator/validate.py b/_delphi_utils_python/delphi_utils/validator/validate.py index 9c4861b76..d03d7e3c2 100644 --- a/_delphi_utils_python/delphi_utils/validator/validate.py +++ b/_delphi_utils_python/delphi_utils/validator/validate.py @@ -58,6 +58,10 @@ def validate(self): frames_list = load_all_files(self.export_dir, self.time_window.start_date, self.time_window.end_date) self.static_validation.validate(frames_list, report) - all_frames = aggregate_frames(frames_list) + # Check if frames_list is empty before calling aggregate_frames + if len(frames_list) == 0: + all_frames = [] + else: + all_frames = aggregate_frames(frames_list) self.dynamic_validation.validate(all_frames, report) return report diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py index bf270b4fd..b3ac91fc0 100644 --- a/_delphi_utils_python/tests/validator/test_static.py +++ b/_delphi_utils_python/tests/validator/test_static.py @@ -27,8 +27,8 @@ def test_empty_filelist(self): filenames = list() validator.check_missing_date_files(filenames, report) - assert len(report.raised_errors) == 1 - assert report.raised_errors[0].check_name == "check_missing_date_files" + assert len(report.raised_warnings) == 1 + assert report.raised_warnings[0].check_name == "check_empty_filelist" def test_same_day(self): params = { diff --git a/dsew_community_profile/delphi_dsew_community_profile/run.py b/dsew_community_profile/delphi_dsew_community_profile/run.py index ba4041630..6de1443a7 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/run.py +++ b/dsew_community_profile/delphi_dsew_community_profile/run.py @@ -125,6 +125,3 @@ def replace_date_param(p): csv_export_count = csv_export_count, max_lag_in_days = max_lag_in_days, oldest_final_export_date = formatted_min_max_date) - # Print warning if no CSV export - if csv_export_count == 0: - logger.warning("No CSV output - manual validation may be needed") From a1f597b0165d70d11034d0e8439f013f8f6e83b2 Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Tue, 20 Dec 2022 13:58:38 -0500 Subject: [PATCH 3/8] Add better notes and tweak files check --- _delphi_utils_python/delphi_utils/runner.py | 1 + .../delphi_utils/validator/static.py | 13 +++++++------ _delphi_utils_python/tests/validator/test_static.py | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/runner.py b/_delphi_utils_python/delphi_utils/runner.py index d21a67be5..c86177c58 100644 --- a/_delphi_utils_python/delphi_utils/runner.py +++ b/_delphi_utils_python/delphi_utils/runner.py @@ -71,6 +71,7 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None], if "delivery" in params: transfer_files() + if __name__ == "__main__": parser = ap.ArgumentParser() parser.add_argument("indicator_name", diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py index 65f83c00b..67c932c90 100644 --- a/_delphi_utils_python/delphi_utils/validator/static.py +++ b/_delphi_utils_python/delphi_utils/validator/static.py @@ -90,16 +90,17 @@ def check_missing_date_files(self, daily_filenames, report): Returns: - None """ - # Create set of all dates seen in CSV names. - unique_dates = {datetime.strptime( - daily_filename[0][0:8], '%Y%m%d').date() for daily_filename in daily_filenames} - # Check if there any dates in unique_dates before checking if any dates are missing - # Validation will be skipped if there is no CSV export - if len(unique_dates) == 0: + # Check to see if there are any files in the export directory + # Validator will throw a warning if the directory is empty + if len(daily_filenames) == 0: report.add_raised_warning( ValidationFailure("check_empty_filelist", message="No files found in export directory")) + # Check for missing date only happens when files are found else: + # Create set of all dates seen in CSV names. + unique_dates = {datetime.strptime( + daily_filename[0][0:8], '%Y%m%d').date() for daily_filename in daily_filenames} # Diff expected and observed dates. expected_dates = self.params.time_window.date_seq if len(self.params.max_expected_lag) == 0: diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py index b3ac91fc0..dd8522565 100644 --- a/_delphi_utils_python/tests/validator/test_static.py +++ b/_delphi_utils_python/tests/validator/test_static.py @@ -27,6 +27,7 @@ def test_empty_filelist(self): filenames = list() validator.check_missing_date_files(filenames, report) + # Empty file list now triggers a raised warning and not an error in validation assert len(report.raised_warnings) == 1 assert report.raised_warnings[0].check_name == "check_empty_filelist" From bfb0215687abd74f7b9f6f358580ec855de92212 Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Tue, 20 Dec 2022 15:06:26 -0500 Subject: [PATCH 4/8] Empty filelist check now an error, suppress-able --- _delphi_utils_python/delphi_utils/validator/static.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py index 67c932c90..9946d5c79 100644 --- a/_delphi_utils_python/delphi_utils/validator/static.py +++ b/_delphi_utils_python/delphi_utils/validator/static.py @@ -91,9 +91,9 @@ def check_missing_date_files(self, daily_filenames, report): - None """ # Check to see if there are any files in the export directory - # Validator will throw a warning if the directory is empty + # Validator will throw an error if the directory is empty, which can be suppressed if len(daily_filenames) == 0: - report.add_raised_warning( + report.add_raised_error( ValidationFailure("check_empty_filelist", message="No files found in export directory")) # Check for missing date only happens when files are found From 6bd1666307e105737ea79a642d9c86fc2a320b12 Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Tue, 20 Dec 2022 15:08:08 -0500 Subject: [PATCH 5/8] Static test returned to original form --- _delphi_utils_python/tests/validator/test_static.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py index dd8522565..7ce327efc 100644 --- a/_delphi_utils_python/tests/validator/test_static.py +++ b/_delphi_utils_python/tests/validator/test_static.py @@ -27,9 +27,8 @@ def test_empty_filelist(self): filenames = list() validator.check_missing_date_files(filenames, report) - # Empty file list now triggers a raised warning and not an error in validation - assert len(report.raised_warnings) == 1 - assert report.raised_warnings[0].check_name == "check_empty_filelist" + assert len(report.raised_errors) == 1 + assert report.raised_errors[0].check_name == "check_empty_filelist" def test_same_day(self): params = { From 793ea14b99323093092bbb03296819e4cc91181e Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Wed, 4 Jan 2023 09:14:09 -0500 Subject: [PATCH 6/8] fixed indents in static.py --- _delphi_utils_python/delphi_utils/validator/static.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py index 9946d5c79..d58096d97 100644 --- a/_delphi_utils_python/delphi_utils/validator/static.py +++ b/_delphi_utils_python/delphi_utils/validator/static.py @@ -100,7 +100,7 @@ def check_missing_date_files(self, daily_filenames, report): else: # Create set of all dates seen in CSV names. unique_dates = {datetime.strptime( - daily_filename[0][0:8], '%Y%m%d').date() for daily_filename in daily_filenames} + daily_filename[0][0:8], '%Y%m%d').date() for daily_filename in daily_filenames} # Diff expected and observed dates. expected_dates = self.params.time_window.date_seq if len(self.params.max_expected_lag) == 0: @@ -111,7 +111,7 @@ def check_missing_date_files(self, daily_filenames, report): # Only check for date if it should definitely be present, # i.e if it is more than max_expected_lag since the checking date expected_dates = [date for date in expected_dates if - ((datetime.today().date() - date).days) > max_expected_lag_overall] + ((datetime.today().date() - date).days) > max_expected_lag_overall] check_dateholes = list(set(expected_dates).difference(unique_dates)) check_dateholes.sort() From 13e229d213139d8c3be156c9a63ebf385d712194 Mon Sep 17 00:00:00 2001 From: Mitch-Delphi Date: Tue, 24 Jan 2023 16:47:26 -0500 Subject: [PATCH 7/8] add back in missing file check --- .../tests/validator/test_static.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py index 7ce327efc..44817be2e 100644 --- a/_delphi_utils_python/tests/validator/test_static.py +++ b/_delphi_utils_python/tests/validator/test_static.py @@ -21,6 +21,7 @@ def test_empty_filelist(self): } } validator = StaticValidator(params) + #wtf is this report = ValidationReport([]) report = ValidationReport([]) @@ -30,6 +31,26 @@ def test_empty_filelist(self): assert len(report.raised_errors) == 1 assert report.raised_errors[0].check_name == "check_empty_filelist" + def test_missing_date_files(self): + params = { + "common": { + "data_source": "", + "span_length": 5, + "end_date": "2020-09-05", + "max_expected_lag": {"all": "1"} + } + } + validator = StaticValidator(params) + report = ValidationReport([]) + filenames = [("20200901_county_signal_signal.csv", "match_obj"), + ("20200903_county_signal_signal.csv", "match_obj"), + ("20200904_county_signal_signal.csv", "match_obj"), + ("20200905_county_signal_signal.csv", "match_obj")] + validator.check_missing_date_files(filenames, report) + assert len(report.raised_errors) == 1 + assert report.raised_errors[0].check_name == "check_missing_date_files" + + def test_same_day(self): params = { "common": { From 8d5fc6b01887a528543ce6b6c0244079cfdf33ec Mon Sep 17 00:00:00 2001 From: M5Skid <115434969+M5Skid@users.noreply.github.com> Date: Tue, 24 Jan 2023 16:57:02 -0500 Subject: [PATCH 8/8] Update test_static.py took out comment --- _delphi_utils_python/tests/validator/test_static.py | 1 - 1 file changed, 1 deletion(-) diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py index 44817be2e..ea3d44b1a 100644 --- a/_delphi_utils_python/tests/validator/test_static.py +++ b/_delphi_utils_python/tests/validator/test_static.py @@ -21,7 +21,6 @@ def test_empty_filelist(self): } } validator = StaticValidator(params) - #wtf is this report = ValidationReport([]) report = ValidationReport([])