Skip to content

Commit 30dd449

Browse files
committed
Edits for avg diff check
1 parent 1284363 commit 30dd449

File tree

1 file changed

+22
-7
lines changed

1 file changed

+22
-7
lines changed

validator/delphi_validator/fbsurveyvalidation.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,30 @@ def check_avg_val_diffs(recent_df, recent_api_df, smooth_option):
142142

143143
smoothed_thresholds = raw_thresholds.apply(lambda x: x/(math.sqrt(7) * 1.5))
144144

145-
146-
145+
# Code reference from R code
146+
# changesum.by.variable.with.flags = changesum.by.variable %>>%
147+
# dplyr::mutate(mean.stddiff.high = abs(mean.stddiff) > thresholds[["mean.stddiff"]] |
148+
# variable=="val" & abs(mean.stddiff) > thresholds[["val.mean.stddiff"]],
149+
# mean.stdabsdiff.high = mean.stdabsdiff > thresholds[["mean.stdabsdiff"]]) %>>%
150+
151+
switcher = {
152+
'raw': raw_thresholds,
153+
'smoothed': smoothed_thresholds,
154+
}
155+
# Get the function from switcher dictionary
156+
thres = switcher.get(smooth_option, lambda: "Invalid smoothing option")
157+
158+
mean.stddiff.high = mean_stddiff.abs() > thres.loc['mean.stddiff'] or
159+
mean_stddiff.abs() > thres.loc['val.mean.stddiff"']
160+
mean.stdabsdiff.high = mean_stdabsdiff > thres.loc['mean.stdabsdiff']
161+
162+
if mean.stddiff.high or mean.stdabsdiff.high:
163+
print('Average differences in variables by geoid between recent & semirecent data seem \
164+
large --- either large increase tending toward one direction or large mean absolute \
165+
difference, relative to average values of corresponding variables. For the former \
166+
check, tolerances for `val` are more restrictive than those for other columns.'')
147167

148168

149-
# The daterange function is exclusive of the end_date in line with the native python range()
150-
# for check_date in daterange(start_date, end_date):
151-
# print(check_date.strftime("%Y-%m-%d"))
152-
153-
154169
def fbsurvey_validation(daily_filenames, sdate, edate, max_check_lookbehind = timedelta(days=7), sanity_check_rows_per_day = True, sanity_check_value_diffs = True):
155170

156171
meta = covidcast.metadata()

0 commit comments

Comments
 (0)