cmu-delphi
diff --git a/‎.github/workflows/python-ci.yml
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/python-ci.yml
Lines changed: 1 addition & 2 deletions
diff --git a/‎_delphi_utils_python/Makefile
Lines changed: 3 additions & 5 deletions b/‎_delphi_utils_python/Makefile
Lines changed: 3 additions & 5 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/__init__.py
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/delphi_utils/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/delphi_utils/archive.py
Lines changed: 16 additions & 4 deletions b/‎_delphi_utils_python/delphi_utils/archive.py
Lines changed: 16 additions & 4 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/export.py
Lines changed: 8 additions & 8 deletions b/‎_delphi_utils_python/delphi_utils/export.py
Lines changed: 8 additions & 8 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/geomap.py
Lines changed: 87 additions & 89 deletions b/‎_delphi_utils_python/delphi_utils/geomap.py
Lines changed: 87 additions & 89 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/nancodes.py
Lines changed: 12 additions & 7 deletions b/‎_delphi_utils_python/delphi_utils/nancodes.py
Lines changed: 12 additions & 7 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/runner.py
Lines changed: 35 additions & 0 deletions b/‎_delphi_utils_python/delphi_utils/runner.py
Lines changed: 35 additions & 0 deletions
@@ -16,7 +16,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        packages: [_delphi_utils_python, cdc_covidnet, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph, safegraph_patterns, usafacts]
+        packages: [_delphi_utils_python, cdc_covidnet, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph, safegraph_patterns, usafacts]
     defaults:
       run:
         working-directory: ${{ matrix.packages }}
@@ -34,7 +34,6 @@ jobs:
       run: |
         make install
     - name: Lint
-      if: ${{ matrix.packages != 'claims_hosp' }}
       run: |
         make lint
     - name: Test
 
@@ -1,7 +1,5 @@
 .PHONY = venv, lint, test, clean
 
-dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*')
-
 venv:
 	python3.8 -m venv env
 
@@ -11,12 +9,12 @@ install: venv
 	pip install -e .
 
 lint:
-	. env/bin/activate; pylint $(dir)
-	. env/bin/activate; pydocstyle $(dir)
+	. env/bin/activate; pylint delphi_utils
+	. env/bin/activate; pydocstyle delphi_utils
 
 test:
 	. env/bin/activate ;\
-	(cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing)
+	(cd tests && ../env/bin/pytest --cov=delphi_utils --cov-report=term-missing)
 
 clean:
 	rm -rf env
 
@@ -12,6 +12,6 @@
 from .geomap import GeoMapper
 from .smooth import Smoother
 from .signal import add_prefix
-from .nancodes import NAN_CODES
+from .nancodes import Nans
 
 __version__ = "0.1.0"
@@ -641,12 +641,24 @@ def update_cache(self):
                         help="Commit message for `archive_type` = 'git'")
     args = parser.parse_args()
     params = read_params()
+
+    # Autodetect whether parameters have been factored hierarchically or not
+    # See https://github.com/cmu-delphi/covidcast-indicators/issues/847
+    # Once all indicators have their parameters factored in to "common", "indicator", "validation",
+    # and "archive", this code will be obsolete.
+    if "archive" in params:
+        archive_params = params["archive"]
+        common_params = params["common"]
+    else:
+        archive_params = params
+        common_params = params
+
     run_module(args.archive_type,
-               params["cache_dir"],
-               params["export_dir"],
-               aws_credentials=params.get("aws_credentials", {}),
+               archive_params["cache_dir"],
+               common_params["export_dir"],
+               aws_credentials=archive_params.get("aws_credentials", {}),
                branch_name=args.branch_name,
-               bucket_name=params.get("bucket_name", ""),
+               bucket_name=archive_params.get("bucket_name", ""),
                commit_message=args.commit_message,
                commit_partial_success=args.commit_partial_success,
                indicator_prefix=args.indicator_prefix,
 
@@ -64,16 +64,16 @@ def create_export_csv(
         else:
             export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{metric}_{sensor}.csv"
         export_file = join(export_dir, export_filename)
-        EXPECTED_COLUMNS = [
-            "geo_id", 
-            "val", 
-            "se", 
-            "sample_size", 
-            "missing_val", 
-            "missing_se", 
+        expected_columns = [
+            "geo_id",
+            "val",
+            "se",
+            "sample_size",
+            "missing_val",
+            "missing_se",
             "missing_sample_size"
         ]
-        export_df = df[df["timestamp"] == date].filter(items=EXPECTED_COLUMNS)
+        export_df = df[df["timestamp"] == date].filter(items=expected_columns)
         if remove_null_samples:
             export_df = export_df[export_df["sample_size"].notnull()]
         export_df = export_df.round({"val": 7, "se": 7})
 
@@ -141,85 +141,77 @@ def _load_crosswalk(self, from_code, to_code):
         assert from_code in self.crosswalk_filepaths, \
             f"No crosswalk files for {from_code}; try {'; '.join(self.crosswalk_filepaths.keys())}"
         assert to_code in self.crosswalk_filepaths[from_code], \
-            f"No crosswalk file from {from_code} to {to_code}; try" \
+            f"No crosswalk file from {from_code} to {to_code}; try " \
             f"{'; '.join(self.crosswalk_filepaths[from_code].keys())}"
+
+        if self.crosswalks[from_code][to_code] is None:
+            self.crosswalks[from_code][to_code] = self._load_crosswalk_from_file(from_code, to_code)
+        return self.crosswalks[from_code][to_code]
+
+    def _load_crosswalk_from_file(self, from_code, to_code):
         stream = pkg_resources.resource_stream(
             __name__, self.crosswalk_filepaths[from_code][to_code]
         )
-        if self.crosswalks[from_code][to_code] is None:
-            # Weighted crosswalks
-            if (from_code, to_code) in [
-                ("zip", "fips"),
-                ("fips", "zip"),
-                ("jhu_uid", "fips"),
-                ("zip", "msa"),
-                ("fips", "hrr"),
-                ("zip", "hhs")
-            ]:
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={
-                        from_code: str,
-                        to_code: str,
-                        "weight": float,
-                    },
-                )
-            # Unweighted crosswalks
-            elif (from_code, to_code) in [
-                ("zip", "hrr"),
-                ("fips", "msa"),
-                ("fips", "hhs"),
-                ("state_code", "hhs")
-            ]:
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={from_code: str, to_code: str},
-                )
-            # Special table of state codes, state IDs, and state names
-            elif (from_code, to_code) == ("state", "state"):
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={
-                        "state_code": str,
-                        "state_id": str,
-                        "state_name": str,
-                    },
-                )
-            elif (from_code, to_code) == ("zip", "state"):
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={
-                        "zip": str,
-                        "weight": float,
-                        "state_code": str,
-                        "state_id": str,
-                        "state_name": str,
-                    },
-                )
-            elif (from_code, to_code) == ("fips", "state"):
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={
-                        "fips": str,
-                        "state_code": str,
-                        "state_id": str,
-                        "state_name": str,
-                    },
-                )
-            # Population tables
-            elif to_code == "pop":
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={
-                        from_code: str,
-                        "pop": int,
-                    },
-                    usecols=[
-                        from_code,
-                        "pop"
-                    ]
-                )
-        return self.crosswalks[from_code][to_code]
+        usecols = None
+        dtype = None
+        # Weighted crosswalks
+        if (from_code, to_code) in [
+            ("zip", "fips"),
+            ("fips", "zip"),
+            ("jhu_uid", "fips"),
+            ("zip", "msa"),
+            ("fips", "hrr"),
+            ("zip", "hhs")
+        ]:
+            dtype = {
+                from_code: str,
+                to_code: str,
+                "weight": float,
+            }
+
+        # Unweighted crosswalks
+        elif (from_code, to_code) in [
+            ("zip", "hrr"),
+            ("fips", "msa"),
+            ("fips", "hhs"),
+            ("state_code", "hhs")
+        ]:
+            dtype = {from_code: str, to_code: str}
+
+        # Special table of state codes, state IDs, and state names
+        elif (from_code, to_code) == ("state", "state"):
+            dtype = {
+                "state_code": str,
+                "state_id": str,
+                "state_name": str,
+            }
+        elif (from_code, to_code) == ("zip", "state"):
+            dtype = {
+                "zip": str,
+                "weight": float,
+                "state_code": str,
+                "state_id": str,
+                "state_name": str,
+            }
+        elif (from_code, to_code) == ("fips", "state"):
+            dtype = {
+                    "fips": str,
+                    "state_code": str,
+                    "state_id": str,
+                    "state_name": str,
+            }
+
+        # Population tables
+        elif to_code == "pop":
+            dtype = {
+                from_code: str,
+                "pop": int,
+            }
+            usecols = [
+                from_code,
+                "pop"
+            ]
+        return pd.read_csv(stream, dtype=dtype, usecols=usecols)
 
     @staticmethod
     def convert_fips_to_mega(data, fips_col="fips", mega_col="megafips"):
@@ -333,19 +325,8 @@ def add_geocode(
             else:
                 df[from_col] = df[from_col].astype(str)
 
-        # Assuming that the passed-in records are all United States data, at the moment
-        if (from_code, new_code) in [("fips", "nation"), # pylint: disable=no-else-return
-                                     ("zip", "nation"),
-                                     ("state_code", "nation"),
-                                     ("state_name", "nation"),
-                                     ("state_id", "nation")]:
-            df[new_col] = df[from_col].apply(lambda x: "us")
-            return df
-        elif new_code == "nation":
-            raise ValueError(
-                f"Conversion to the nation level is not supported "
-                f"from {from_code}; try fips, zip, or state_*"
-            )
+        if new_code == "nation":
+            return self._add_nation_geocode(df, from_code, from_col, new_col)
 
         # state codes are all stored in one table
         if from_code in state_codes and new_code in state_codes:
@@ -375,11 +356,28 @@ def add_geocode(
             df.drop(columns=state_codes, inplace=True)
         elif new_code in state_codes and from_code in state_codes:
             state_codes.remove(new_code)
-            state_codes.remove(from_code)
+            if from_code in state_codes:
+                state_codes.remove(from_code)
             df.drop(columns=state_codes, inplace=True)
 
         return df
 
+    def _add_nation_geocode(self, df, from_code, from_col, new_col):
+        """Add a nation geocode column to a dataframe.
+
+        See `add_geocode()` documentation for argument description.
+        """
+        valid_from_codes = ["fips", "zip", "state_code", "state_name", "state_id"]
+        # Assuming that the passed-in records are all United States data, at the moment
+        if from_code in valid_from_codes:
+            df[new_col] = df[from_col].apply(lambda x: "us")
+            return df
+
+        raise ValueError(
+            f"Conversion to the nation level is not supported "
+            f"from {from_code}; try {valid_from_codes}"
+        )
+
     def replace_geocode(
         self,
         df,
 
@@ -1,8 +1,13 @@
+"""Provides unified not-a-number codes for the indicators."""
 
-NAN_CODES = {
-    "Not Missing": 0,
-    "Not Applicable": 1,
-    "Region Exception": 2,
-    "Data Insufficient": 3,
-    "Unknown": 4
-}
+from enum import IntEnum
+
+class Nans(IntEnum):
+    """An enum of not-a-number codes for the indicators."""
+
+    NOT_MISSING = 0
+    NOT_APPLICABLE = 1
+    REGION_EXCEPTION = 2
+    DATA_INSUFFICIENT = 3
+    PRIVACY = 4
+    UNKNOWN = 5
@@ -0,0 +1,35 @@
+"""Indicator running utilities."""
+from typing import Any, Callable, Dict, Optional
+from .archive import ArchiveDiffer
+from .utils import read_params
+from .validator.validate import Validator
+
+Params = Dict[str, Any]
+
+# Trivial function to use as default value for validator and archive functions.
+NULL_FN = lambda x: None
+
+def run_indicator_pipeline(indicator_fn:  Callable[[Params], None],
+                           validator_fn:  Callable[[Params], Optional[Validator]] = NULL_FN,
+                           archiver_fn:  Callable[[Params], Optional[ArchiveDiffer]] = NULL_FN):
+    """Run an indicator with its optional validation and archiving.
+
+    Arguments
+    ---------
+    indicator_fn: Callable[[Params], None]
+        function that takes a dictionary of parameters and produces indicator output
+    validator_fn: Callable[[Params], Optional[Validator]]
+        function that takes a dictionary of parameters and produces the associated Validator or
+        None if no validation should be performed.
+    archiver_fn: Callable[[Params], Optional[ArchiveDiffer]]
+        function that takes a dictionary of parameters and produces the associated ArchiveDiffer or
+        None if no archiving should be performed.
+    """
+    params = read_params()
+    indicator_fn(params)
+    validator = validator_fn(params)
+    archiver = archiver_fn(params)
+    if validator:
+        validation_report = validator.validate()
+    if archiver and (not validator or validation_report.success()):
+        archiver.archive()