cmu-delphi · krivard · Jan 31, 2022 · Dec 10, 2021 · Dec 10, 2021 · Jan 19, 2022
diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py
@@ -110,6 +110,8 @@ def validate(self, all_frames, report):
             self.check_max_allowed_max_date(
                 max_date, geo_type, signal_type, report)
 
+            self.check_na_vals(geo_sig_df, geo_type, signal_type, report)
+
             # Get relevant reference data from API dictionary.
             api_df_or_error = all_api_df[(geo_type, signal_type)]
 
@@ -168,6 +170,40 @@ def validate(self, all_frames, report):
             if self.test_mode and kroc == 2:
                 break
 
+    def check_na_vals(self, geo_sig_df, geo_type, signal_type, report):
+        """Check if there are any NA values.
+
+        In particular, make sure that error doesn't occur for new Geo IDs introduced.
+
+        Arguments:
+            - geo_type: str; geo type name (county, msa, hrr, state) as in the CSV name
+            - signal_type: str; signal name as in the CSV name
+            - report: ValidationReport; report where results are added
+
+        Returns:
+            - None
+        """
+        def replace_first_six(df):
+            x = df.val.isnull()
+            # First 6 days have to be null
+            x.iloc[:6] = False
+            return df.time_value[x]
+
+        grouped_df = geo_sig_df.groupby('geo_id')
+        error_df = grouped_df.apply(replace_first_six)
+
+        if not error_df.empty:
+            for index, value in error_df.iteritems():
+                report.add_raised_error(
+                    ValidationFailure(f"check_val_missing (geo_id {index[0]})",
+                                      geo_type=geo_type,
+                                      signal=signal_type,
+                                      date=value
+                                      )
+                )
+
+        report.increment_total_checks()
+
     def check_min_allowed_max_date(self, max_date, geo_type, signal_type, report):
         """Check if time since data was generated is reasonable or too long ago.
 

diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py
@@ -295,14 +295,6 @@ def check_bad_val(self, df_to_test, nameformat, signal_type, report):
 
             report.increment_total_checks()
 
-        if df_to_test['val'].isnull().values.any():
-            report.add_raised_error(
-                ValidationFailure("check_val_missing",
-                                  filename=nameformat,
-                                  message="val column can't have any cell that is NA"))
-
-        report.increment_total_checks()
-
         if not df_to_test[(df_to_test['val'] < 0)].empty:
             report.add_raised_error(
                 ValidationFailure("check_val_lt_0",

diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py
@@ -105,7 +105,25 @@ def test_0_vs_many(self):
 
         assert len(report.raised_errors) == 1
         assert report.raised_errors[0].check_name == "check_rapid_change_num_rows"
-
+class TestCheckNaVals:
+    params = {
+        "common": {
+            "data_source": "",
+            "span_length": 1,
+            "end_date": "2020-09-02"
+        }
+    }
+    def test_missing(self):
+        validator = DynamicValidator(self.params)
+        report = ValidationReport([])
+        data = {"val": [np.nan] * 14, "geo_id": [0,1] * 7, "time_value": ["2021-09-01"] * 14 }
+        df = pd.DataFrame(data)
+        #df.set_index(range(7), inplace=True)
+        validator.check_na_vals(df, "geo", "signal", report)
+
+        assert len(report.raised_errors) == 2
+        assert report.raised_errors[0].check_name == "check_val_missing (geo_id 0)"
+        assert report.raised_errors[1].check_name == "check_val_missing (geo_id 1)"
 
 class TestCheckAvgValDiffs:
     params = {

diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py
@@ -362,15 +362,6 @@ def test_empty_df(self):
 
         assert len(report.raised_errors) == 0
 
-    def test_missing(self):
-        validator = StaticValidator(self.params)
-        report = ValidationReport([])
-        df = pd.DataFrame([np.nan], columns=["val"])
-        validator.check_bad_val(df, FILENAME, "signal", report)
-
-        assert len(report.raised_errors) == 1
-        assert report.raised_errors[0].check_name == "check_val_missing"
-
     def test_lt_0(self):
         validator = StaticValidator(self.params)
         report = ValidationReport([])