Merge pull request #245 from cmu-delphi/jhu_fix_0824

krivard · web-flow · commit c012908b2bc6 · 2020-08-28T08:48:49.000-04:00
fixed geo test
diff --git a/jhu/.gitignore b/jhu/.gitignore
@@ -5,6 +5,7 @@ params.json
 
 # Do not commit output files
 receiving/*.csv
+tests/receiving/*.csv
 
 # Remove macOS files
 .DS_Store
diff --git a/jhu/delphi_jhu/run.py b/jhu/delphi_jhu/run.py
@@ -93,11 +93,13 @@ def run_module():
         df = dfs[metric]
         # Aggregate to appropriate geographic resolution
         df = geo_map(df, geo_res)
-        df["val"] = SMOOTHERS_MAP[smoother][0](df[sensor].values)
+        df.set_index(["timestamp", "geo_id"], inplace=True)
+        df["val"] = df[sensor].groupby(level=1).transform(SMOOTHERS_MAP[smoother][0])
         df["se"] = np.nan
         df["sample_size"] = np.nan
         # Drop early entries where data insufficient for smoothing
-        df = df.loc[~df["val"].isnull(), :]
+        df = df[~df["val"].isnull()]
+        df = df.reset_index()
         sensor_name = SENSOR_NAME_MAP[sensor][0]
         # if (SENSOR_NAME_MAP[sensor][1] or SMOOTHERS_MAP[smoother][2]):
         #     metric = f"wip_{metric}"
diff --git a/jhu/tests/receiving/.gitkeep b/jhu/tests/receiving/.gitkeep
diff --git a/jhu/tests/test_geo.py b/jhu/tests/test_geo.py
@@ -20,7 +20,7 @@ def test_incorrect_geo(self):
         )
 
         with pytest.raises(ValueError):
-            geo_map(df, "département", 'new_counts')
+            geo_map(df, "département")
 
     def test_county(self):
         df = pd.DataFrame(
@@ -45,7 +45,7 @@ def test_county(self):
 
         df = df.append(df_mega)
 
-        new_df = geo_map(df, "county", 'new_counts')
+        new_df = geo_map(df, "county")
 
         exp_incidence = df["new_counts"] / df["population"] * 100000
         exp_cprop = df["cumulative_counts"] / df["population"] * 100000
@@ -78,7 +78,7 @@ def test_state(self):
 
         df = df.append(df_mega)
 
-        new_df = geo_map(df, "state", 'new_counts')
+        new_df = geo_map(df, "state")
 
         exp_incidence = np.array([27 + 5, 13 + 10]) / np.array([2500, 25]) * 100000
         exp_cprop = np.array([165 + 30, 60 + 100]) / np.array([2500, 25]) * 100000
@@ -114,7 +114,7 @@ def test_hrr(self):
 
         # df = df.append(df_mega)
 
-        new_df = geo_map(df, "hrr", 'new_counts')
+        new_df = geo_map(df, "hrr")
 
         exp_incidence = np.array([13, 27]) / np.array([25, 2500]) * 100000
         exp_cprop = np.array([60, 165]) / np.array([25, 2500]) * 100000
@@ -145,7 +145,7 @@ def test_msa(self):
 
         # df = df.append(df_mega)
 
-        new_df = geo_map(df, "msa", 'new_counts')
+        new_df = geo_map(df, "msa")
 
         assert new_df["geo_id"].isin([31420, 49340]).all()
         assert new_df["timestamp"].isin(["2020-02-15"]).all()
diff --git a/jhu/tests/test_smooth.py b/jhu/tests/test_smooth.py
@@ -10,21 +10,25 @@
 class TestSmooth:
     def test_output_files_smoothed(self, run_as_module):
 
-        dates = [str(x) for x in range(20200701, 20200730)]
+        dates = [str(x) for x in range(20200303, 20200310)]
 
         smoothed = pd.read_csv(
-            join("../receiving",
+            join("./receiving",
                 f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv")
         )
 
+        # Build a dataframe out of the individual day files
         raw = pd.concat([
             pd.read_csv(
-                join("../receiving",
+                join("./receiving",
                     f"{date}_state_confirmed_cumulative_num.csv")
             ) for date in dates
         ])
-
+        # Compute the mean across the time values; order doesn't matter 
+        # this corresponds to the smoothed value on the last day 
+        # 2020-03-10
         raw = raw.groupby('geo_id')['val'].mean()
-        df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw'))
         
+        df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw'))
         assert np.allclose(df['val_smoothed'].values, df['val_raw'].values)
+