Skip to content

Commit f855d94

Browse files
Ananya JoshiAnanya Joshi
Ananya Joshi
authored and
Ananya Joshi
committed
basic testing and linting complete
1 parent 6723691 commit f855d94

File tree

2 files changed

+21
-21
lines changed

2 files changed

+21
-21
lines changed

_delphi_utils_python/delphi_utils/flash_eval/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ the flagging system , as follows:
4646

4747
```
4848
env/bin/python -m delphi_INDICATORNAME
49-
env/bin/python -m delphi_utils.flagging
49+
env/bin/python -m delphi_utils.flash_eval
50+
5051
```
5152

5253
Once you are finished with the code, you can deactivate the virtual environment
@@ -91,7 +92,7 @@ make test
9192
or
9293

9394
```
94-
(cd tests && ../env/bin/pytest test_file.py --cov=delphi_utils.flagging --cov-report=term-missing)
95+
(cd tests && ../env/bin/pytest test_file.py --cov=delphi_utils --cov-report=term-missing)
9596
```
9697

9798
The output will show the number of unit tests that passed and failed, along with the percentage of code covered by the tests. None of the tests should fail and the code lines that are not covered by unit tests should be small and should not include critical sub-routines.

_delphi_utils_python/delphi_utils/flash_eval/eval_day.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"Functions pretaining to running FlaSH daily."
2-
import time
2+
#import time
33
import math
44
import numpy as np
55
import covidcast
@@ -41,7 +41,6 @@ def outlier(df, iqr_list=None, replace=pd.DataFrame(), replace_use=False):
4141
df['day'] = [x.weekday() for x in list(df.index)]
4242
diff_df2 = diff_df_small
4343
diff_df2['day'] = df['day']
44-
4544
diff_df2_stack = diff_df2.drop(columns=['day']).stack().reset_index()
4645
diff_df2_stack.columns = ['date', 'state', 'val']
4746
diff_df2_stack['weekday'] = diff_df2_stack.date.dt.weekday
@@ -54,7 +53,7 @@ def outlier(df, iqr_list=None, replace=pd.DataFrame(), replace_use=False):
5453
iqr_spec_df2 = diff_df2_stack.iloc[1:, :]
5554
for _, (_, ldf) in enumerate(iqr_spec_df2.groupby(['weekday'])):
5655
iqr = ldf.groupby('state').apply(lambda x: x.val.quantile([lower, 0.5, upper]).T)
57-
iqr = fix_iqr(iqr)
56+
iqr = iqr.apply(lambda x: fix_iqr(x), axis=1)
5857
iqr['delta'] = 1.5 * (np.ceil(iqr[upper]) - np.floor(iqr[lower]))
5958
iqr['lower_bound'] = iqr[lower] - iqr['delta']
6059
iqr['upper_bound'] = iqr[upper] + iqr['delta']
@@ -113,7 +112,7 @@ def spike(x):
113112
window_size = 7
114113
shift_val = -1 if window_size % 2 == 0 else 0
115114
group = x.to_frame()
116-
group.columns = ["value"]
115+
group.columns = ["value"]
117116
rolling_windows = group["value"].rolling(
118117
window_size, min_periods=window_size)
119118
center_windows = group["value"].rolling(
@@ -127,7 +126,7 @@ def spike(x):
127126
group['state'] = x.name
128127
group_list.append(group)
129128

130-
spike(all_frames_orig.T)
129+
all_frames_orig.apply(lambda x: spike(x), axis=0).to_list()
131130
all_frames = pd.concat(group_list)
132131
outlier_df = all_frames.reset_index().sort_values(by=['state', 'ref']) \
133132
.reset_index(drop=True).copy()
@@ -197,7 +196,7 @@ def predict_val(col, params_state, lags_names):
197196
lags_names=lags_names, axis=0).T.clip(0)
198197
y_predict.columns = ['y_predict']
199198
y_values_df = y_values_df.merge(y_predict, left_index=True,
200-
right_index=True, how='outer').droplevel(level=0)
199+
right_index=True, how='outer')#.droplevel(level=0)
201200
weekday_outlier_flags['flag'] = 'weekday outlier'
202201
large_spike_flags['flag'] = 'large_spikes'
203202
flags_returned = pd.concat([weekday_outlier_flags,
@@ -215,8 +214,8 @@ def return_vals(val, ref_dist):
215214
pval.name = 'pval'
216215
for state in dist.index:
217216
pval[state] = (sum(ref_dist.astype(float) < float(dist[state])) / len(ref_dist))
218-
val = val.merge(dist, left_on='state', right_index=True, how='outer')
219-
val = val.merge(pval, left_on='state', right_index=True, how='outer')
217+
val = val.merge(dist, left_index=True, right_index=True, how='outer')
218+
val = val.merge(pval, left_index=True, right_index=True, how='outer')
220219
return val
221220

222221
def process_anomalies(y, t_skew=None):
@@ -322,17 +321,17 @@ def flash_eval_lag(input_df, range_tup, lag, signal, logger):
322321
if iter_df.shape[0] > 0 :
323322
for _, row in iter_df.reset_index().iterrows():
324323
total_flags += 1
325-
326-
start_link = f"{starter_link},{row.state}"
324+
start_link = f"{starter_link},{row['index']}"
327325
if 'pval' in iter_df.columns :
328-
p_text += f"\t{start_link}|*{row.state}, {row.pval}*>\n"
326+
p_text += f"\t{start_link}|*{row['index']}, {row.pval}*>\n"
329327
elif 'y_raw' in iter_df.columns :
330-
p_text += f"\t{start_link}|*{row.state}, {row.y_raw}*>\n"
328+
p_text += f"\t{start_link}|*{row['index']}, {row.y_raw}*>\n"
331329
logger.info(name,
332330
payload=p_text,
333331
hits=iter_df.shape[0])
334332
p_text = ""
335333

334+
336335
def flash_eval(params):
337336
""" Evaluate most recent data using FlaSH.
338337
First, get any necessary files from the cache or download from the API.
@@ -343,7 +342,6 @@ def flash_eval(params):
343342
Ouput: None
344343
"""
345344

346-
347345
logger = get_structured_logger(
348346
__name__, filename=params["common"].get("log_filename"),
349347
log_exceptions=params["common"].get("log_exceptions", True))
@@ -358,7 +356,7 @@ def flash_eval(params):
358356
signals = params["flash"]["signals"]
359357
for signal in signals:
360358
curr_df = pd.DataFrame()
361-
start_time = time.time()
359+
#start_time = time.time()
362360
for date_s in pd.date_range(most_recent_d-pd.Timedelta('14d'),
363361
most_recent_d-pd.Timedelta('1d')):
364362
data = covidcast.signal(source, signal,
@@ -395,12 +393,12 @@ def flash_eval(params):
395393
data = data.set_index(['state', 'lag', 'ref', 'as_of'])
396394
curr_df = pd.concat([data, curr_df])
397395
curr_df = curr_df[~curr_df.index.duplicated(keep='first')].reset_index()
398-
end_time = time.time()
399-
print(f"Total Download Time: {start_time-end_time}")
396+
#end_time = time.time()
397+
#print(f"Total Download Time: {start_time-end_time}")
400398

401399

402400
for lag in range(1,8):
403-
start_time = time.time()
401+
#start_time = time.time()
404402
date_range = list(pd.date_range(most_recent_d-pd.Timedelta(f'{lag+7}d'),
405403
most_recent_d-pd.Timedelta(f'{lag}d')))
406404
input_df = curr_df.query('lag==@lag and ref in @date_range').sort_values('ref')
@@ -411,6 +409,7 @@ def flash_eval(params):
411409
input_df = input_df.merge(date_df, left_index=True, right_index=True,
412410
how='right').ffill().bfill().reset_index()
413411
input_df = input_df.set_index(['ref', 'state'])[['value']].unstack().ffill().bfill()
412+
input_df.columns = input_df.columns.droplevel()
414413
flash_eval_lag(input_df, [0, math.inf], lag, signal, logger)
415-
end_time = time.time()
416-
print(f"Time lag {lag}: {start_time - end_time}")
414+
#end_time = time.time()
415+
#print(f"Time lag {lag}: {start_time - end_time}")

0 commit comments

Comments
 (0)