Skip to content

Commit 1098180

Browse files
committed
Update check_missing_values
Moved check_val_missing to dynamic checks. Increased granularity, now informs for each geo_id+date combination for missing values Ignores first 6 days of the checking period since 7dav signals return NAs as part of smoothing
1 parent 0b7103a commit 1098180

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ def validate(self, all_frames, report):
110110
self.check_max_allowed_max_date(
111111
max_date, geo_type, signal_type, report)
112112

113+
self.check_na_vals(geo_sig_df, geo_type, signal_type, report)
114+
113115
# Get relevant reference data from API dictionary.
114116
api_df_or_error = all_api_df[(geo_type, signal_type)]
115117

@@ -168,6 +170,40 @@ def validate(self, all_frames, report):
168170
if self.test_mode and kroc == 2:
169171
break
170172

173+
def check_na_vals(self, geo_sig_df, geo_type, signal_type, report):
174+
"""Check if there are any NA values.
175+
176+
In particular, make sure that error doesn't occur for new Geo IDs introduced.
177+
178+
Arguments:
179+
- geo_type: str; geo type name (county, msa, hrr, state) as in the CSV name
180+
- signal_type: str; signal name as in the CSV name
181+
- report: ValidationReport; report where results are added
182+
183+
Returns:
184+
- None
185+
"""
186+
def replace_first_six(df):
187+
x = df.val.isnull()
188+
# First 6 days have to be null
189+
x.iloc[:6] = False
190+
return df.time_value[x]
191+
192+
grouped_df = geo_sig_df.groupby('geo_id')
193+
error_df = grouped_df.apply(replace_first_six)
194+
195+
if not error_df.empty:
196+
for index, value in error_df.iteritems():
197+
report.add_raised_error(
198+
ValidationFailure(f"check_val_missing (geo_id {index[0]})",
199+
geo_type=geo_type,
200+
signal=signal_type,
201+
date=value
202+
)
203+
)
204+
205+
report.increment_total_checks()
206+
171207
def check_min_allowed_max_date(self, max_date, geo_type, signal_type, report):
172208
"""Check if time since data was generated is reasonable or too long ago.
173209

_delphi_utils_python/delphi_utils/validator/static.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,6 @@ def check_bad_val(self, df_to_test, nameformat, signal_type, report):
295295

296296
report.increment_total_checks()
297297

298-
if df_to_test['val'].isnull().values.any():
299-
report.add_raised_error(
300-
ValidationFailure("check_val_missing",
301-
filename=nameformat,
302-
message="val column can't have any cell that is NA"))
303-
304-
report.increment_total_checks()
305-
306298
if not df_to_test[(df_to_test['val'] < 0)].empty:
307299
report.add_raised_error(
308300
ValidationFailure("check_val_lt_0",

0 commit comments

Comments
 (0)