5
5
from itertools import product
6
6
7
7
import pandas as pd
8
+ import numpy as np
8
9
9
10
from conftest import TEST_DIR
10
11
@@ -64,7 +65,6 @@ def test_output_files_exist(self, run_as_module):
64
65
nf = "_" .join ([date , geo , metric , smther , "search" ]) + ".csv"
65
66
expected_files .append (nf )
66
67
67
- csv_dates = list (set ([datetime .strptime (f .split ('_' )[0 ], "%Y%m%d" ) for f in csv_files [smther ] if smther in f ]))
68
68
assert set (csv_files [smther ]).issuperset (set (expected_files ))
69
69
70
70
@@ -74,3 +74,24 @@ def test_output_file_format(self):
74
74
)
75
75
assert (df .columns .values == [
76
76
"geo_id" , "val" , "se" , "sample_size" ]).all ()
77
+
78
+ def test_output_files_smoothed (self ):
79
+ dates = [str (x ) for x in range (20200804 , 20200811 )]
80
+
81
+ smoothed = pd .read_csv (
82
+ join (f"{ TEST_DIR } /receiving" ,
83
+ f"{ dates [- 1 ]} _state_s01_smoothed_search.csv" )
84
+ )
85
+
86
+ raw = pd .concat ([
87
+ pd .read_csv (
88
+ join (f"{ TEST_DIR } /receiving" ,
89
+ f"{ date } _state_s01_raw_search.csv" )
90
+ ) for date in dates
91
+ ])
92
+
93
+ raw = raw .groupby ('geo_id' )['val' ].sum ()/ 7.0
94
+ df = pd .merge (smoothed , raw , on = 'geo_id' ,
95
+ suffixes = ('_smoothed' , '_raw' ))
96
+
97
+ assert np .allclose (df ['val_smoothed' ].values , df ['val_raw' ].values )
0 commit comments