Skip to content

Commit 1284363

Browse files
committed
Adding smoothing thresholds
1 parent 5e60926 commit 1284363

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

validator/delphi_validator/fbsurveyvalidation.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,19 +123,27 @@ def check_rapid_change(checking_date, recent_df, recent_api_df, date_list, sig,
123123
print("The suspicous spike is for date: ", checking_date, ", signal: ", sig, ", geo_type: ", geo)
124124

125125

126-
def check_avg_val_diffs(recent_df, recent_api_df):
126+
def check_avg_val_diffs(recent_df, recent_api_df, smooth_option):
127127
print("recent_df dtypes", recent_df.dtypes)
128128
recent_df = recent_df.drop(columns=['geo_id'])
129-
mean_recent_df = recent_df.mean()
129+
mean_recent_df = recent_df[['val', 'se', 'sample_size']].mean()
130130
recent_api_df = recent_api_df.groupby(['geo_value'], as_index=False)[['val', 'se', 'sample_size']].mean()
131131
recent_api_df = recent_api_df.drop(columns=['geo_value'])
132+
132133
mean_recent_api_df = recent_api_df.mean()
133134

134-
#mean.stddiff = (mean(recent-semirecent)*2/(mean(recent)+mean(semirecent)))
135135
mean_stddiff = ((mean_recent_df - mean_recent_api_df).mean() * 2) / (mean_recent_df.mean() + mean_recent_api_df.mean())
136136
mean_stdabsdiff = ((mean_recent_df - mean_recent_api_df).abs().mean() * 2) / (mean_recent_df.mean() + mean_recent_api_df.mean())
137137
print("mean_stddiff", mean_stddiff)
138138
print("mean_stdabsdiff", mean_stdabsdiff)
139+
140+
classes = ['mean.stddiff', 'val.mean.stddiff', 'mean.stdabsdiff']
141+
raw_thresholds = pd.DataFrame([0.50, 0.30, 0.80], classes)
142+
143+
smoothed_thresholds = raw_thresholds.apply(lambda x: x/(math.sqrt(7) * 1.5))
144+
145+
146+
139147

140148

141149
# The daterange function is exclusive of the end_date in line with the native python range()
@@ -192,10 +200,14 @@ def fbsurvey_validation(daily_filenames, sdate, edate, max_check_lookbehind = ti
192200
## in time, how many days -- before subtracting out the "recent" days ---
193201
## do we use to form the reference statistics?
194202
semirecent_lookbehind = timedelta(days=7)
203+
smooth_option_regex = re.compile(r'([^_]+)')
195204

196205
kroc = 0
197206
for recent_df, geo, sig in read_geo_sig_cmbo_files(geo_sig_cmbo, data_folder, filenames, date_slist):
198207

208+
m = smooth_option_regex.match(sig)
209+
smooth_option = m.group(1)
210+
199211
#recent_df.set_index("time_value", inplace = True)
200212
print("Printing recent_df scenes:", recent_df.shape)
201213
print(recent_df)
@@ -219,7 +231,7 @@ def fbsurvey_validation(daily_filenames, sdate, edate, max_check_lookbehind = ti
219231
# check_rapid_change(checking_date, recent_df, recent_api_df, date_list, sig, geo)
220232

221233
if sanity_check_value_diffs:
222-
check_avg_val_diffs(recent_df, recent_api_df)
234+
check_avg_val_diffs(recent_df, recent_api_df, smooth_option)
223235
kroc += 1
224236
if kroc == 2:
225237
break

0 commit comments

Comments
 (0)