lint compliance for usafacts

sgsmob · sgsmob · commit aba2c853f56a · 2020-10-29T10:52:33.000-04:00
diff --git a/usafacts/delphi_usafacts/geo.py b/usafacts/delphi_usafacts/geo.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+"""Functions for converting geocodes."""
 import pandas as pd
 
 
@@ -78,6 +79,11 @@
 
 FIPS_TO_STATE = {v: k.lower() for k, v in STATE_TO_FIPS.items()}
 
+# Valid geographical resolutions output by this indicator.
+VALID_GEO_RES = ("county", "state", "msa", "hrr")
+# Sensors that report proportions.  For geo resolutions with unallocated cases
+# or deaths, we avoid reporting these sensors.
+PROP_SENSORS = ("incidence", "cumulative_prop")
 
 def fips_to_state(fips: str) -> str:
     """Wrapper that handles exceptions to the FIPS scheme in the USAFacts data.
@@ -121,9 +127,9 @@ def disburse(df: pd.DataFrame, pooled_fips: str, fips_list: list):
     pd.DataFrame
         Dataframe with same schema as df, with the counts disbursed.
     """
-    COLS = ["new_counts", "cumulative_counts"]
+    cols = ["new_counts", "cumulative_counts"]
     df = df.copy().sort_values(["fips", "timestamp"])
-    for col in COLS:
+    for col in cols:
         # Get values from the aggregated county:
         vals = df.loc[df["fips"] == pooled_fips, col].values / len(fips_list)
         for fips in fips_list:
@@ -155,9 +161,6 @@ def geo_map(df: pd.DataFrame, geo_res: str, map_df: pd.DataFrame, sensor: str):
     pd.DataFrame
         Columns: geo_id, timestamp, ...
     """
-    VALID_GEO_RES = ("county", "state", "msa", "hrr")
-    #It is not clear how to calculate the proportion for unallocated cases/deaths
-    PROP_SENSORS = ("incidence", "cumulative_prop")
     if geo_res not in VALID_GEO_RES:
         raise ValueError(f"geo_res must be one of {VALID_GEO_RES}")
 
diff --git a/usafacts/delphi_usafacts/pull.py b/usafacts/delphi_usafacts/pull.py
@@ -1,7 +1,16 @@
 # -*- coding: utf-8 -*-
+"""Functions for pulling data from the USAFacts website."""
 import numpy as np
 import pandas as pd
 
+# Columns to drop the the data frame.
+DROP_COLUMNS = [
+    "FIPS",
+    "County Name",
+    "State",
+    "stateFIPS"
+]
+
 
 def pull_usafacts_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.DataFrame:
     """Pulls the latest USA Facts data, and conforms it into a dataset
@@ -43,24 +52,15 @@ def pull_usafacts_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.D
     pd.DataFrame
         Dataframe as described above.
     """
-    # Constants
-    DROP_COLUMNS = [
-        "FIPS",
-        "County Name",
-        "State",
-        "stateFIPS"
-    ]
-    # MIN_FIPS = 1000
-    # MAX_FIPS = 57000
 
     # Read data
     df = pd.read_csv(base_url.format(metric=metric)).rename({"countyFIPS":"FIPS"}, axis=1)
     # Check missing FIPS
     null_mask = pd.isnull(df["FIPS"])
     assert null_mask.sum() == 0
 
-    UNEXPECTED_COLUMNS = [x for x in df.columns if "Unnamed" in x]
-    DROP_COLUMNS.extend(UNEXPECTED_COLUMNS)
+    unexpected_columns = [x for x in df.columns if "Unnamed" in x]
+    unexpected_columns.extend(DROP_COLUMNS)
 
     # Assign Grand Princess Cruise Ship a special FIPS 90000
     # df.loc[df["FIPS"] == 6000, "FIPS"] = 90000
@@ -90,7 +90,7 @@ def pull_usafacts_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.D
             "Tried to drop non-existent columns. The dataset "
             "schema may have changed.  Please investigate and "
             "amend DROP_COLUMNS."
-        )
+        ) from e
     # Check that columns are either FIPS or dates
     try:
         columns = list(df.columns)
@@ -99,13 +99,12 @@ def pull_usafacts_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.D
         # Detects whether there is a non-date string column -- not perfect
         _ = [int(x.replace("/", "")) for x in columns]
     except ValueError as e:
-        print(e)
         raise ValueError(
             "Detected unexpected column(s) "
             "after dropping DROP_COLUMNS. The dataset "
             "schema may have changed. Please investigate and "
             "amend DROP_COLUMNS."
-        )
+        ) from e
     # Reshape dataframe
     df = df.melt(
         id_vars=["fips", "population"],
diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py
@@ -67,7 +67,7 @@
 
 
 def run_module():
-
+    """Run the usafacts indicator."""
     params = read_params()
     export_start_date = params["export_start_date"]
     if export_start_date == "latest":
diff --git a/usafacts/delphi_usafacts/smooth.py b/usafacts/delphi_usafacts/smooth.py
@@ -1,3 +1,4 @@
+'''Functions for smoothing signals.'''
 # -*- coding: utf-8 -*-
 import numpy as np
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+'''Functions for smoothing signals.'''`
`1`	`2`	`# -- coding: utf-8 --`
`2`	`3`	`import numpy as np`
`3`	`4`