Skip to content

Get utils to pass pydocstyle #570

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion _delphi_utils_python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ install: venv

lint:
. env/bin/activate; \
pylint $(dir)
pylint $(dir); \
pydocstyle $(dir)

test:
. env/bin/activate ;\
Expand Down
3 changes: 1 addition & 2 deletions _delphi_utils_python/delphi_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
"""Common Utility Functions to Support DELPHI Indicators
"""
"""Common Utility Functions to Support DELPHI Indicators."""

from __future__ import absolute_import

Expand Down
51 changes: 31 additions & 20 deletions _delphi_utils_python/delphi_utils/archive.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
Utilities for diffing and archiving covidcast export CSVs.

Aims to simplify the creation of issues for new and backfilled value for indicators.
Also handles archiving of export CSVs to some backend (git, S3 etc.) before replacing them.

Expand Down Expand Up @@ -52,6 +53,7 @@ def diff_export_csv(
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""
Find differences in exported covidcast CSVs, using geo_id as the index.

Treats NA == NA as True.

Parameters
Expand All @@ -68,7 +70,6 @@ def diff_export_csv(
changed_df is the pd.DataFrame of common rows from after_csv with changed values.
added_df is the pd.DataFrame of added rows from after_csv.
"""

export_csv_dtypes = {"geo_id": str, "val": float,
"se": float, "sample_size": float}

Expand Down Expand Up @@ -99,7 +100,7 @@ def run_module(archive_type: str,
cache_dir: str,
export_dir: str,
**kwargs):
"""Builds and runs an ArchiveDiffer.
"""Build and runs an ArchiveDiffer.

Parameters
----------
Expand Down Expand Up @@ -132,13 +133,11 @@ def run_module(archive_type: str,


class ArchiveDiffer:
"""
Base class for performing diffing and archiving of exported covidcast CSVs
"""
"""Base class for performing diffing and archiving of exported covidcast CSVs."""

def __init__(self, cache_dir: str, export_dir: str):
"""
Initialize an ArchiveDiffer
Initialize an ArchiveDiffer.

Parameters
----------
Expand All @@ -157,15 +156,17 @@ def __init__(self, cache_dir: str, export_dir: str):

def update_cache(self):
"""
For making sure cache_dir is updated correctly from a backend.
Make sure cache_dir is updated correctly from a backend.

To be implemented by specific archiving backends.
Should set self._cache_updated = True after verifying cache is updated.
"""
raise NotImplementedError

def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]:
"""
Finds diffs across and within CSV files, from cache_dir to export_dir.
Find diffs across and within CSV files, from cache_dir to export_dir.

Should be called after update_cache() succeeds. Only works on *.csv files,
ignores every other file.

Expand Down Expand Up @@ -223,7 +224,8 @@ def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]:

def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:
"""
Handles actual archiving of files, depending on specific backend.
Handle actual archiving of files, depending on specific backend.

To be implemented by specific archiving backends.

Parameters
Expand All @@ -241,6 +243,8 @@ def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:

def filter_exports(self, common_diffs: FileDiffMap):
"""
Filter export directory to only contain relevant files.

Filters down the export_dir to only contain:
1) New files, 2) Changed files, filtered-down to the ADDED and CHANGED rows only.
Should be called after archive_exports() so we archive the raw exports before
Expand Down Expand Up @@ -269,7 +273,7 @@ def filter_exports(self, common_diffs: FileDiffMap):
replace(diff_file, exported_file)

def run(self):
"""Runs the differ and archives the changed and new files."""
"""Run the differ and archives the changed and new files."""
self.update_cache()

# Diff exports, and make incremental versions
Expand All @@ -293,7 +297,8 @@ def run(self):

class S3ArchiveDiffer(ArchiveDiffer):
"""
AWS S3 backend for archving
AWS S3 backend for archiving.

Archives CSV files into a S3 bucket, with keys "{indicator_prefix}/{csv_file_name}".
Ideally, versioning should be enabled in this bucket to track versions of each CSV file.
"""
Expand All @@ -306,6 +311,7 @@ def __init__(
):
"""
Initialize a S3ArchiveDiffer.

See this link for possible aws_credentials kwargs:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session

Expand All @@ -330,9 +336,7 @@ def __init__(
self.indicator_prefix = indicator_prefix

def update_cache(self):
"""
For making sure cache_dir is updated with all latest files from the S3 bucket.
"""
"""For making sure cache_dir is updated with all latest files from the S3 bucket."""
# List all indicator-related objects from S3
archive_objects = self.bucket.objects.filter(
Prefix=self.indicator_prefix).all()
Expand All @@ -358,7 +362,7 @@ def archive_exports(self, # pylint: disable=arguments-differ
update_s3: bool = True
) -> Tuple[Files, Files]:
"""
Handles actual archiving of files to the S3 bucket.
Handle actual archiving of files to the S3 bucket.

Parameters
----------
Expand Down Expand Up @@ -398,7 +402,8 @@ def archive_exports(self, # pylint: disable=arguments-differ

class GitArchiveDiffer(ArchiveDiffer):
"""
Local git repo backend for archiving
Local git repo backend for archiving.

Archives CSV files into a local git repo as commits.
Assumes that a git repository is already set up.
"""
Expand Down Expand Up @@ -446,7 +451,8 @@ def __init__(

def get_branch(self, branch_name: Optional[str] = None) -> Head:
"""
Retrieves a Head object representing a branch of specified name.
Retrieve a Head object representing a branch of specified name.

Creates the branch from the current active branch if does not exist yet.

Parameters
Expand All @@ -469,6 +475,8 @@ def get_branch(self, branch_name: Optional[str] = None) -> Head:
@contextmanager
def archiving_branch(self):
"""
Context manager for checking out a branch.

Useful for checking out self.branch within a context, then switching back
to original branch when finished.
"""
Expand All @@ -482,8 +490,9 @@ def archiving_branch(self):

def update_cache(self):
"""
Check if cache_dir is clean: has everything nice committed if override_dirty=False.

Since we are using a local git repo, assumes there is nothing to update from.
Checks if cache_dir is clean: has everything nice committed if override_dirty=False
"""
# Make sure cache directory is clean: has everything nicely committed
if not self.override_dirty:
Expand All @@ -495,14 +504,16 @@ def update_cache(self):

def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]:
"""
Same as base class diff_exports, but in context of specified branch
Find diffs across and within CSV files, from cache_dir to export_dir.

Same as base class diff_exports, but in context of specified branch.
"""
with self.archiving_branch():
return super().diff_exports()

def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:
"""
Handles actual archiving of files to the local git repo.
Handle actual archiving of files to the local git repo.

Parameters
----------
Expand Down
18 changes: 10 additions & 8 deletions _delphi_utils_python/delphi_utils/geomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ class GeoMapper: # pylint: disable=too-many-public-methods
"""

def __init__(self):
"""Initialize geomapper. Holds loading the crosswalk tables
until a conversion function is first used.
"""Initialize geomapper.

Holds loading the crosswalk tables until a conversion function is first used.

Parameters
---------
Expand All @@ -110,7 +111,7 @@ def __init__(self):

# Utility functions
def _load_crosswalk(self, from_code, to_code):
"""Loads the crosswalk from from_code -> to_code."""
"""Load the crosswalk from from_code -> to_code."""
stream = pkg_resources.resource_stream(
__name__, self.crosswalk_filepaths[from_code][to_code]
)
Expand Down Expand Up @@ -189,7 +190,7 @@ def _load_crosswalk(self, from_code, to_code):

@staticmethod
def convert_fips_to_mega(data, fips_col="fips", mega_col="megafips"):
"""convert fips string to a megafips string"""
"""Convert fips string to a megafips string."""
data = data.copy()
data[mega_col] = data[fips_col].astype(str).str.zfill(5)
data[mega_col] = data[mega_col].str.slice_replace(start=2, stop=5, repl="000")
Expand All @@ -205,7 +206,7 @@ def megacounty_creation(
date_col="date",
mega_col="megafips",
):
"""create megacounty column
"""Create megacounty column.

Parameters
---------
Expand Down Expand Up @@ -412,8 +413,9 @@ def replace_geocode(

def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True):
"""
Appends a population column to a dataframe, based on the FIPS or ZIP code. If no
dataframe is provided, the full crosswalk from geocode to population is returned.
Append a population column to a dataframe, based on the FIPS or ZIP code.

If no dataframe is provided, the full crosswalk from geocode to population is returned.

Parameters
---------
Expand Down Expand Up @@ -464,7 +466,7 @@ def fips_to_megacounty(
mega_col="megafips",
count_cols=None,
):
"""Convert and aggregate from FIPS to megaFIPS
"""Convert and aggregate from FIPS to megaFIPS.

Parameters
---------
Expand Down
7 changes: 4 additions & 3 deletions _delphi_utils_python/delphi_utils/signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import covidcast

def add_prefix(signal_names, wip_signal, prefix="wip_"):
"""Adds prefix to signal if there is a WIP signal
"""Add prefix to signal if there is a WIP signal.

Parameters
----------
signal_names: List[str]
Expand All @@ -18,7 +19,6 @@ def add_prefix(signal_names, wip_signal, prefix="wip_"):
List of signal names
wip/non wip signals for further computation
"""

if wip_signal is True:
return [prefix + signal for signal in signal_names]
if isinstance(wip_signal, list):
Expand All @@ -37,7 +37,8 @@ def add_prefix(signal_names, wip_signal, prefix="wip_"):


def public_signal(signal):
"""Checks if the signal name is already public using COVIDcast
"""Check if the signal name is already public using COVIDcast.

Parameters
----------
signal : str
Expand Down
2 changes: 1 addition & 1 deletion _delphi_utils_python/delphi_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from shutil import copyfile

def read_params():
"""Reads a file named 'params.json' in the current working directory.
"""Read a file named 'params.json' in the current working directory.

If the file does not exist, it copies the file 'params.json.template' to
'param.json' and then reads the file.
Expand Down