Merge pull request #538 from cmu-delphi/all-deploys

krivard · web-flow · commit fe80833632f3 · 2020-11-17T13:04:08.000-05:00
Sync all deploy branches into main in prep for generating prod branch
diff --git a/ansible/templates/quidel_covidtest-params-prod.json.j2 b/ansible/templates/quidel_covidtest-params-prod.json.j2
@@ -0,0 +1,15 @@
+{
+  "static_file_dir": "./static",
+  "export_dir": "/common/covidcast/receiving/quidel",
+  "cache_dir": "./cache",
+  "export_start_date": "2020-05-26",
+  "export_end_date": "",
+  "pull_start_date": "2020-05-26",
+  "pull_end_date":"",
+  "mail_server": "{{ quidel_datadrop_mail_server }}",
+  "account": "{{ quidel_datadrop_account }}",
+  "password": "{{ quidel_datadrop_password }}",
+  "sender": "{{ quidel_datadrop_sender }}",
+  "wip_signal": [""],
+  "mode": ""
+}
diff --git a/jenkins/quidel-jenkins-build.sh b/jenkins/quidel-jenkins-build.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+#
+# Jenkins build
+#
+
+set -eo pipefail
+source ~/.bash_profile
+
+#
+# Build
+#
+
+local_indicator="quidel_covidtest"
+
+cd "${WORKSPACE}/${local_indicator}" || exit
+
+# Set up venv
+python -m venv env
+source env/bin/activate
+pip install ../_delphi_utils_python/.
+pip install .
diff --git a/jenkins/quidel-jenkins-deploy.sh b/jenkins/quidel-jenkins-deploy.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+#
+# Jenkins deploy
+#
+
+set -eo pipefail
+source ~/.bash_profile
+
+#
+# Deploy
+#
+
+local_indicator="quidel_covidtest"
+
+cd "${WORKSPACE}/ansible" || exit
+
+# Ansible!
+ansible-playbook ansible-deploy.yaml --extra-vars "indicator=${local_indicator}" -i inventory
diff --git a/jenkins/quidel-jenkins-package.sh b/jenkins/quidel-jenkins-package.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+#
+# Jenkins package
+#
+
+set -eo pipefail
+source ~/.bash_profile
+
+#
+# Package
+#
+
+local_indicator="quidel_covidtest"
+
+cd "${WORKSPACE}" || exit
+
+# Create .tar.gz for deployment
+tar -czvf "${JENKINS_HOME}/artifacts/${local_indicator}.tar.gz" "${local_indicator}"
diff --git a/jenkins/quidel-jenkins-test.sh b/jenkins/quidel-jenkins-test.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+#
+# Jenkins test
+#
+
+set -eo pipefail
+source ~/.bash_profile
+
+#
+# Test
+#
+
+local_indicator="quidel_covidtest"
+
+cd "${WORKSPACE}/${local_indicator}" || exit
+
+# Linter
+env/bin/pylint delphi_"${local_indicator}"
+
+# Unit tests and code coverage
+cd tests || exit && \
+  ../env/bin/pytest --cov=delphi_"${local_indicator}" --cov-report=term-missing
diff --git a/quidel_covidtest/.pylintrc b/quidel_covidtest/.pylintrc
@@ -19,4 +19,5 @@ attr-rgx=[a-z_][a-z0-9_]*
 [DESIGN]
 
 # Don't complain about pytest "unused" arguments.
-ignored-argument-names=(_.*|run_as_module)
+ignored-argument-names=(_.*|run_as_module)
+disable=R0801, C0330, E1101, E0611, C0114, C0116, C0103, R0913, R0914, W0702, W0212, E1136
diff --git a/quidel_covidtest/delphi_quidel_covidtest/geo_maps.py b/quidel_covidtest/delphi_quidel_covidtest/geo_maps.py
@@ -1,77 +1,49 @@
 """Contains geographic mapping tools."""
-
-def zip_to_msa(data, map_df):
-    """Map from zipcode to MSA (along with parent state).
-    Args:
-        data: dataframe at the day-zip resolution.
-    Returns:
-        tuple, a dataframe at day-msa, with parent state column, and their string keys
+from delphi_utils import GeoMapper
+
+date_col = "timestamp"
+data_cols = ['totalTest', 'numUniqueDevices', 'positiveTest', "population"]
+gmpr = GeoMapper() # Use geo utils
+GEO_KEY_DICT = {
+        "county": "fips",
+        "msa": "msa",
+        "hrr": "hrr",
+        "state": "state_id"
+}
+def geo_map(geo_res, df):
+    data = df.copy()
+    geo_key = GEO_KEY_DICT[geo_res]
+    # Add population for each zipcode
+    data = gmpr.add_population_column(data, "zip")
+    # zip -> geo_res
+    data = gmpr.replace_geocode(data, "zip", geo_key,
+                                date_col=date_col, data_cols=data_cols)
+    if geo_res == "state":
+        return data
+    # Add parent state
+    data = add_parent_state(data, geo_res, geo_key)
+    return data, geo_key
+
+def add_parent_state(data, geo_res, geo_key):
     """
-    # zip -> msa
-    zip_map = map_df[["zip", "cbsa_id"]].dropna().drop_duplicates()
-    # forget about the rest of the zips that aren't in MSA
-    data = data.merge(zip_map, how="left", on="zip").dropna().drop(columns=["zip"], axis=1)
-
-    # msa + parent state
-    # msa_map has mapping from msa to state, going by the state with the largest
-    # population (since a msa may span multiple states)
-    msa_map = map_df[["cbsa_id", "state_id", "population"]]
-    msa_map = msa_map.groupby(["cbsa_id"]).max().reset_index()
-    data = data.merge(msa_map, how="left", on="cbsa_id").drop(
-        columns=["population"]).dropna()
-    data = data.groupby(["timestamp", "cbsa_id", "state_id"]).sum().reset_index()
-    data["cbsa_id"] = data["cbsa_id"].apply(lambda x: str(int(x)).zfill(5))
-
-    return data, "cbsa_id"
-
-def zip_to_hrr(data, map_df):
-    """Map from zipcode to HRR (along with parent state).
-    Args:
-        data: dataframe at the day-zip resolution.
-    Returns:
-        tuple, a dataframe at day-msa, with parent state column, and their string keys
+    - map from msa/hrr to state, going by the state with the largest
+      population (since a msa/hrr may span multiple states)
+    - map from county to the corresponding state
     """
-    # zip -> msa
-    zip_map = map_df[["zip", "hrrnum"]].dropna().drop_duplicates()
-    # forget about the rest of the zips that aren't in MSA
-    data = data.merge(zip_map, how="left", on="zip").dropna().drop(columns=["zip"], axis=1)
-
-    # msa + parent state
-    # msa_map has mapping from msa to state, going by the state with the largest
-    # population (since a msa may span multiple states)
-    msa_map = map_df[["hrrnum", "state_id", "population"]]
-    msa_map = msa_map.groupby(["hrrnum"]).max().reset_index()
-    data = data.merge(msa_map, how="left", on="hrrnum").drop(
+    fips_to_state = gmpr._load_crosswalk(from_code="fips", to_code="state")
+    if geo_res == "county":
+        mix_map = fips_to_state[["fips", "state_id"]]
+    else:
+        fips_to_geo_res = gmpr._load_crosswalk(from_code="fips", to_code=geo_res)
+        mix_map = fips_to_geo_res[["fips", geo_res]].merge(
+                fips_to_state[["fips", "state_id"]],
+                on="fips",
+                how="inner")
+        mix_map = gmpr.add_population_column(mix_map, "fips").groupby(
+                geo_res).max().reset_index().drop(
+                ["fips", "population"], axis = 1)
+    # Merge the info of parent state to the data
+    data = data.merge(mix_map, how="left", on=geo_key).drop(
         columns=["population"]).dropna()
-    data = data.groupby(["timestamp", "hrrnum", "state_id"]).sum().reset_index()
-    data["hrrnum"] = data["hrrnum"].astype(int)
-
-    return data, "hrrnum"
-
-def zip_to_county(data, map_df):
-    """Aggregate zip codes to the county resolution, along with its parent state.
-    Args:
-        data: dataframe aggregated to the day-zip resolution
-    Returns:
-        dataframe at the day-county resolution and parent state, with their string keys
-    """
-    # zip -> county + parent state (county has unique state)
-    zip_map = map_df[["fips", "zip", "state_id"]].dropna().drop_duplicates()
-    data = data.merge(zip_map, how="left", on="zip").drop(columns=["zip"]).dropna()
-    data = data.groupby(["timestamp", "fips", "state_id"]).sum().reset_index()
-    data["fips"] = data["fips"].apply(lambda x: str(int(x)).zfill(5))
-
-    return data, "fips"
-
-def zip_to_state(data, map_df):
-    """Aggregate zip codes to the state resolution.
-    Args:
-        data: dataframe aggregated to the day-zip resolution
-    Returns:
-        dataframe at the day-state resolution, with the state key
-    """
-    zip_map = map_df[["zip", "state_id"]].dropna().drop_duplicates()
-    data = data.merge(zip_map, how="left", on="zip").drop(
-        columns=["zip"]).dropna()
-    data = data.groupby(["timestamp", "state_id"]).sum().reset_index()
+    data = data.groupby(["timestamp", geo_key, "state_id"]).sum().reset_index()
     return data
diff --git a/quidel_covidtest/delphi_quidel_covidtest/run.py b/quidel_covidtest/delphi_quidel_covidtest/run.py
@@ -4,12 +4,9 @@
 This module should contain a function called `run_module`, that is executed
 when the module is run with `python -m MODULE_NAME`.
 """
-from os.path import join
-
-import pandas as pd
 from delphi_utils import read_params, add_prefix
 
-from .geo_maps import (zip_to_msa, zip_to_hrr, zip_to_county, zip_to_state)
+from .geo_maps import geo_map
 from .pull import (pull_quidel_covidtest,
                    check_export_start_date,
                    check_export_end_date,
@@ -20,21 +17,16 @@
 from .constants import (END_FROM_TODAY_MINUS, EXPORT_DAY_RANGE,
                         SMOOTHED_POSITIVE, RAW_POSITIVE,
                         SMOOTHED_TEST_PER_DEVICE, RAW_TEST_PER_DEVICE,
-                        GEO_RESOLUTIONS, SENSORS, SMOOTHERS,
-                        COUNTY, MSA)
+                        GEO_RESOLUTIONS, SENSORS, SMOOTHERS)
 
 
 def run_module():
     """Run the quidel_covidtest indicator."""
     params = read_params()
     cache_dir = params["cache_dir"]
     export_dir = params["export_dir"]
-    static_file_dir = params["static_file_dir"]
     export_start_date = params["export_start_date"]
     export_end_date = params["export_end_date"]
-    map_df = pd.read_csv(
-        join(static_file_dir, "fips_prop_pop.csv"), dtype={"fips": int}
-    )
 
     # Pull data and update export date
     df, _end_date = pull_quidel_covidtest(params)
@@ -50,7 +42,7 @@ def run_module():
 
     # State Level
     data = df.copy()
-    state_groups = zip_to_state(data, map_df).groupby("state_id")
+    state_groups = geo_map("state", data).groupby("state_id")
 
     # Add prefix, if required
     sensors = add_prefix(SENSORS,
@@ -76,21 +68,15 @@ def run_module():
         export_csv(state_df, "state", sensor, receiving_dir=export_dir,
                    start_date=export_start_date, end_date=export_end_date)
 
-        # County/HRR/MSA level
-        for geo_res in GEO_RESOLUTIONS:
+    # County/HRR/MSA level
+    for geo_res in GEO_RESOLUTIONS:
+        geo_data, res_key = geo_map(geo_res, data)
+        for sensor in sensors:
             print(geo_res, sensor)
-            data = df.copy()
-            if geo_res == COUNTY:
-                data, res_key = zip_to_county(data, map_df)
-            elif geo_res == MSA:
-                data, res_key = zip_to_msa(data, map_df)
-            else:
-                data, res_key = zip_to_hrr(data, map_df)
-
             res_df = generate_sensor_for_other_geores(
-                state_groups, data, res_key, smooth=smoothers[sensor][1],
-                device=smoothers[sensor][0], first_date=first_date,
-                last_date=last_date)
+                    state_groups, geo_data, res_key, smooth=smoothers[sensor][1],
+                    device=smoothers[sensor][0], first_date=first_date,
+                    last_date=last_date)
             export_csv(res_df, geo_res, sensor, receiving_dir=export_dir,
                        start_date=export_start_date, end_date=export_end_date)
 
diff --git a/quidel_covidtest/tests/test_geo_maps.py b/quidel_covidtest/tests/test_geo_maps.py
@@ -6,14 +6,9 @@
 import numpy as np
 
 
-from delphi_quidel_covidtest.geo_maps import (zip_to_msa, zip_to_hrr, 
-                                              zip_to_county, zip_to_state)
+from delphi_quidel_covidtest.geo_maps import geo_map
 
 
-map_df = pd.read_csv(
-        join("../static", "fips_prop_pop.csv"), dtype={"fips": int}
-    )
-
 class TestGeoMap:
     def test_county(self):
 
@@ -24,10 +19,11 @@ def test_county(self):
                               "2020-06-15", "2020-06-15", "2020-06-15"],
                 "totalTest": [100, 50, 200, 200, 250, 500],
                 "positiveTest": [10, 8, 15, 5, 20, 50],
+                "numUniqueDevices": [2, 1, 1, 1, 1, 1]
             }
         )
         
-        new_df, res_key = zip_to_county(df, map_df)
+        new_df, res_key = geo_map("county", df)
         
         assert res_key == 'fips'
         assert set(new_df["fips"].values) == set(['25027', '53011', '48439'])
@@ -44,10 +40,11 @@ def test_state(self):
                               "2020-06-15", "2020-06-15", "2020-06-15"],
                 "totalTest": [100, 50, 200, 200, 250, 500],
                 "positiveTest": [10, 8, 15, 5, 20, 50],
+                "numUniqueDevices": [2, 1, 1, 1, 1, 1]
             }
         )
         
-        new_df = zip_to_state(df, map_df)
+        new_df = geo_map("state", df)
 
         assert set(new_df["state_id"].values) == set(['ma', 'tx', 'wa'])
         assert set(new_df["timestamp"].values) == set(df["timestamp"].values)
@@ -63,12 +60,13 @@ def test_hrr(self):
                               "2020-06-15", "2020-06-15", "2020-06-15"],
                 "totalTest": [100, 50, 200, 200, 250, 500],
                 "positiveTest": [10, 8, 15, 5, 20, 50],
+                "numUniqueDevices": [2, 1, 1, 1, 1, 1]
             }
         )
         
-        new_df, res_key = zip_to_hrr(df, map_df)
+        new_df, res_key = geo_map("hrr", df)
 
-        assert set(new_df["hrrnum"].values) == set([16, 231, 340, 344, 394])
+        assert set(new_df["hrr"].values) == set(["16", "231", "340", "344", "394"])
         assert set(new_df["timestamp"].values) == set(df["timestamp"].values)
         assert set(new_df["totalTest"].values)  == set([500, 100, 250, 50, 400])
         assert set(new_df["positiveTest"].values) == set([50, 10, 20, 8, 20])
@@ -77,18 +75,19 @@ def test_msa(self):
 
         df = pd.DataFrame(
             {
-                "zip": [1607, 73716, 73719, 76010, 74435, 74936],
+                "zip": [1607, 73716, 73719, 76010, 74945, 74936],
                 "timestamp": ["2020-06-15", "2020-06-15", "2020-06-15", 
                               "2020-06-15", "2020-06-15", "2020-06-15"],
                 "totalTest": [100, 50, 200, 200, 250, 500],
                 "positiveTest": [10, 8, 15, 5, 20, 50],
+                "numUniqueDevices": [2, 1, 1, 1, 1, 1]
             }
         )
         
-        new_df, res_key = zip_to_msa(df, map_df)
+        new_df, res_key = geo_map("msa", df)
 
-        assert res_key == 'cbsa_id'
-        assert set(new_df["cbsa_id"].values) == set(['19100', '22900', '49340'])
+        assert res_key == 'msa'
+        assert set(new_df["msa"].values) == set(['19100', '22900', '49340'])
         assert set(new_df["timestamp"].values) == set(df["timestamp"].values)
         assert set(new_df["totalTest"].values)  == set([200, 750, 100])
         assert set(new_df["positiveTest"].values) == set([5, 70, 10])