@@ -123,19 +123,27 @@ def check_rapid_change(checking_date, recent_df, recent_api_df, date_list, sig,
123
123
print ("The suspicous spike is for date: " , checking_date , ", signal: " , sig , ", geo_type: " , geo )
124
124
125
125
126
- def check_avg_val_diffs (recent_df , recent_api_df ):
126
+ def check_avg_val_diffs (recent_df , recent_api_df , smooth_option ):
127
127
print ("recent_df dtypes" , recent_df .dtypes )
128
128
recent_df = recent_df .drop (columns = ['geo_id' ])
129
- mean_recent_df = recent_df .mean ()
129
+ mean_recent_df = recent_df [[ 'val' , 'se' , 'sample_size' ]] .mean ()
130
130
recent_api_df = recent_api_df .groupby (['geo_value' ], as_index = False )[['val' , 'se' , 'sample_size' ]].mean ()
131
131
recent_api_df = recent_api_df .drop (columns = ['geo_value' ])
132
+
132
133
mean_recent_api_df = recent_api_df .mean ()
133
134
134
- #mean.stddiff = (mean(recent-semirecent)*2/(mean(recent)+mean(semirecent)))
135
135
mean_stddiff = ((mean_recent_df - mean_recent_api_df ).mean () * 2 ) / (mean_recent_df .mean () + mean_recent_api_df .mean ())
136
136
mean_stdabsdiff = ((mean_recent_df - mean_recent_api_df ).abs ().mean () * 2 ) / (mean_recent_df .mean () + mean_recent_api_df .mean ())
137
137
print ("mean_stddiff" , mean_stddiff )
138
138
print ("mean_stdabsdiff" , mean_stdabsdiff )
139
+
140
+ classes = ['mean.stddiff' , 'val.mean.stddiff' , 'mean.stdabsdiff' ]
141
+ raw_thresholds = pd .DataFrame ([0.50 , 0.30 , 0.80 ], classes )
142
+
143
+ smoothed_thresholds = raw_thresholds .apply (lambda x : x / (math .sqrt (7 ) * 1.5 ))
144
+
145
+
146
+
139
147
140
148
141
149
# The daterange function is exclusive of the end_date in line with the native python range()
@@ -192,10 +200,14 @@ def fbsurvey_validation(daily_filenames, sdate, edate, max_check_lookbehind = ti
192
200
## in time, how many days -- before subtracting out the "recent" days ---
193
201
## do we use to form the reference statistics?
194
202
semirecent_lookbehind = timedelta (days = 7 )
203
+ smooth_option_regex = re .compile (r'([^_]+)' )
195
204
196
205
kroc = 0
197
206
for recent_df , geo , sig in read_geo_sig_cmbo_files (geo_sig_cmbo , data_folder , filenames , date_slist ):
198
207
208
+ m = smooth_option_regex .match (sig )
209
+ smooth_option = m .group (1 )
210
+
199
211
#recent_df.set_index("time_value", inplace = True)
200
212
print ("Printing recent_df scenes:" , recent_df .shape )
201
213
print (recent_df )
@@ -219,7 +231,7 @@ def fbsurvey_validation(daily_filenames, sdate, edate, max_check_lookbehind = ti
219
231
# check_rapid_change(checking_date, recent_df, recent_api_df, date_list, sig, geo)
220
232
221
233
if sanity_check_value_diffs :
222
- check_avg_val_diffs (recent_df , recent_api_df )
234
+ check_avg_val_diffs (recent_df , recent_api_df , smooth_option )
223
235
kroc += 1
224
236
if kroc == 2 :
225
237
break
0 commit comments