diff --git a/_delphi_utils_python/Makefile b/_delphi_utils_python/Makefile index 240142c9d..6db4b759d 100644 --- a/_delphi_utils_python/Makefile +++ b/_delphi_utils_python/Makefile @@ -12,7 +12,8 @@ install: venv lint: . env/bin/activate; \ - pylint $(dir) + pylint $(dir); \ + pydocstyle $(dir) test: . env/bin/activate ;\ diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py index c2b5ef0d1..c796035b8 100644 --- a/_delphi_utils_python/delphi_utils/__init__.py +++ b/_delphi_utils_python/delphi_utils/__init__.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -"""Common Utility Functions to Support DELPHI Indicators -""" +"""Common Utility Functions to Support DELPHI Indicators.""" from __future__ import absolute_import diff --git a/_delphi_utils_python/delphi_utils/archive.py b/_delphi_utils_python/delphi_utils/archive.py index 42f2ab4f3..a707fc4a1 100644 --- a/_delphi_utils_python/delphi_utils/archive.py +++ b/_delphi_utils_python/delphi_utils/archive.py @@ -1,5 +1,6 @@ """ Utilities for diffing and archiving covidcast export CSVs. + Aims to simplify the creation of issues for new and backfilled value for indicators. Also handles archiving of export CSVs to some backend (git, S3 etc.) before replacing them. @@ -52,6 +53,7 @@ def diff_export_csv( ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Find differences in exported covidcast CSVs, using geo_id as the index. + Treats NA == NA as True. Parameters @@ -68,7 +70,6 @@ def diff_export_csv( changed_df is the pd.DataFrame of common rows from after_csv with changed values. added_df is the pd.DataFrame of added rows from after_csv. """ - export_csv_dtypes = {"geo_id": str, "val": float, "se": float, "sample_size": float} @@ -99,7 +100,7 @@ def run_module(archive_type: str, cache_dir: str, export_dir: str, **kwargs): - """Builds and runs an ArchiveDiffer. + """Build and run an ArchiveDiffer. Parameters ---------- @@ -132,13 +133,11 @@ def run_module(archive_type: str, class ArchiveDiffer: - """ - Base class for performing diffing and archiving of exported covidcast CSVs - """ + """Base class for performing diffing and archiving of exported covidcast CSVs.""" def __init__(self, cache_dir: str, export_dir: str): """ - Initialize an ArchiveDiffer + Initialize an ArchiveDiffer. Parameters ---------- @@ -157,7 +156,8 @@ def __init__(self, cache_dir: str, export_dir: str): def update_cache(self): """ - For making sure cache_dir is updated correctly from a backend. + Make sure cache_dir is updated correctly from a backend. + To be implemented by specific archiving backends. Should set self._cache_updated = True after verifying cache is updated. """ @@ -165,7 +165,8 @@ def update_cache(self): def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]: """ - Finds diffs across and within CSV files, from cache_dir to export_dir. + Find diffs across and within CSV files, from cache_dir to export_dir. + Should be called after update_cache() succeeds. Only works on *.csv files, ignores every other file. @@ -223,7 +224,8 @@ def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]: def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]: """ - Handles actual archiving of files, depending on specific backend. + Handle actual archiving of files, depending on specific backend. + To be implemented by specific archiving backends. Parameters @@ -241,6 +243,8 @@ def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]: def filter_exports(self, common_diffs: FileDiffMap): """ + Filter export directory to only contain relevant files. + Filters down the export_dir to only contain: 1) New files, 2) Changed files, filtered-down to the ADDED and CHANGED rows only. Should be called after archive_exports() so we archive the raw exports before @@ -269,7 +273,7 @@ def filter_exports(self, common_diffs: FileDiffMap): replace(diff_file, exported_file) def run(self): - """Runs the differ and archives the changed and new files.""" + """Run the differ and archive the changed and new files.""" self.update_cache() # Diff exports, and make incremental versions @@ -293,7 +297,8 @@ def run(self): class S3ArchiveDiffer(ArchiveDiffer): """ - AWS S3 backend for archving + AWS S3 backend for archiving. + Archives CSV files into a S3 bucket, with keys "{indicator_prefix}/{csv_file_name}". Ideally, versioning should be enabled in this bucket to track versions of each CSV file. """ @@ -306,6 +311,7 @@ def __init__( ): """ Initialize a S3ArchiveDiffer. + See this link for possible aws_credentials kwargs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session @@ -330,9 +336,7 @@ def __init__( self.indicator_prefix = indicator_prefix def update_cache(self): - """ - For making sure cache_dir is updated with all latest files from the S3 bucket. - """ + """Make sure cache_dir is updated with all latest files from the S3 bucket.""" # List all indicator-related objects from S3 archive_objects = self.bucket.objects.filter( Prefix=self.indicator_prefix).all() @@ -358,7 +362,7 @@ def archive_exports(self, # pylint: disable=arguments-differ update_s3: bool = True ) -> Tuple[Files, Files]: """ - Handles actual archiving of files to the S3 bucket. + Handle actual archiving of files to the S3 bucket. Parameters ---------- @@ -398,7 +402,8 @@ def archive_exports(self, # pylint: disable=arguments-differ class GitArchiveDiffer(ArchiveDiffer): """ - Local git repo backend for archiving + Local git repo backend for archiving. + Archives CSV files into a local git repo as commits. Assumes that a git repository is already set up. """ @@ -446,7 +451,8 @@ def __init__( def get_branch(self, branch_name: Optional[str] = None) -> Head: """ - Retrieves a Head object representing a branch of specified name. + Retrieve a Head object representing a branch of specified name. + Creates the branch from the current active branch if does not exist yet. Parameters @@ -469,6 +475,8 @@ def get_branch(self, branch_name: Optional[str] = None) -> Head: @contextmanager def archiving_branch(self): """ + Context manager for checking out a branch. + Useful for checking out self.branch within a context, then switching back to original branch when finished. """ @@ -482,8 +490,9 @@ def archiving_branch(self): def update_cache(self): """ + Check if cache_dir is clean: has everything nicely committed if override_dirty=False. + Since we are using a local git repo, assumes there is nothing to update from. - Checks if cache_dir is clean: has everything nice committed if override_dirty=False """ # Make sure cache directory is clean: has everything nicely committed if not self.override_dirty: @@ -495,14 +504,16 @@ def update_cache(self): def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]: """ - Same as base class diff_exports, but in context of specified branch + Find diffs across and within CSV files, from cache_dir to export_dir. + + Same as base class diff_exports, but in context of specified branch. """ with self.archiving_branch(): return super().diff_exports() def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]: """ - Handles actual archiving of files to the local git repo. + Handle actual archiving of files to the local git repo. Parameters ---------- diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index 55e70630c..d61a2a823 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -91,8 +91,9 @@ class GeoMapper: # pylint: disable=too-many-public-methods """ def __init__(self): - """Initialize geomapper. Holds loading the crosswalk tables - until a conversion function is first used. + """Initialize geomapper. + + Holds loading the crosswalk tables until a conversion function is first used. Parameters --------- @@ -110,7 +111,7 @@ def __init__(self): # Utility functions def _load_crosswalk(self, from_code, to_code): - """Loads the crosswalk from from_code -> to_code.""" + """Load the crosswalk from from_code -> to_code.""" stream = pkg_resources.resource_stream( __name__, self.crosswalk_filepaths[from_code][to_code] ) @@ -189,7 +190,7 @@ def _load_crosswalk(self, from_code, to_code): @staticmethod def convert_fips_to_mega(data, fips_col="fips", mega_col="megafips"): - """convert fips string to a megafips string""" + """Convert fips string to a megafips string.""" data = data.copy() data[mega_col] = data[fips_col].astype(str).str.zfill(5) data[mega_col] = data[mega_col].str.slice_replace(start=2, stop=5, repl="000") @@ -205,7 +206,7 @@ def megacounty_creation( date_col="date", mega_col="megafips", ): - """create megacounty column + """Create megacounty column. Parameters --------- @@ -412,8 +413,9 @@ def replace_geocode( def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True): """ - Appends a population column to a dataframe, based on the FIPS or ZIP code. If no - dataframe is provided, the full crosswalk from geocode to population is returned. + Append a population column to a dataframe, based on the FIPS or ZIP code. + + If no dataframe is provided, the full crosswalk from geocode to population is returned. Parameters --------- @@ -464,7 +466,7 @@ def fips_to_megacounty( mega_col="megafips", count_cols=None, ): - """Convert and aggregate from FIPS to megaFIPS + """Convert and aggregate from FIPS to megaFIPS. Parameters --------- diff --git a/_delphi_utils_python/delphi_utils/signal.py b/_delphi_utils_python/delphi_utils/signal.py index 51a3fe74c..298c87bc3 100644 --- a/_delphi_utils_python/delphi_utils/signal.py +++ b/_delphi_utils_python/delphi_utils/signal.py @@ -2,7 +2,8 @@ import covidcast def add_prefix(signal_names, wip_signal, prefix="wip_"): - """Adds prefix to signal if there is a WIP signal + """Add prefix to signal if there is a WIP signal. + Parameters ---------- signal_names: List[str] @@ -18,7 +19,6 @@ def add_prefix(signal_names, wip_signal, prefix="wip_"): List of signal names wip/non wip signals for further computation """ - if wip_signal is True: return [prefix + signal for signal in signal_names] if isinstance(wip_signal, list): @@ -37,7 +37,8 @@ def add_prefix(signal_names, wip_signal, prefix="wip_"): def public_signal(signal): - """Checks if the signal name is already public using COVIDcast + """Check if the signal name is already public using COVIDcast. + Parameters ---------- signal : str diff --git a/_delphi_utils_python/delphi_utils/utils.py b/_delphi_utils_python/delphi_utils/utils.py index 8de61aa37..9baa4f85b 100644 --- a/_delphi_utils_python/delphi_utils/utils.py +++ b/_delphi_utils_python/delphi_utils/utils.py @@ -5,7 +5,7 @@ from shutil import copyfile def read_params(): - """Reads a file named 'params.json' in the current working directory. + """Read a file named 'params.json' in the current working directory. If the file does not exist, it copies the file 'params.json.template' to 'param.json' and then reads the file.