Skip to content

Doctor_visits patching code #1977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doctor_visits/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ The output will show the number of unit tests that passed and failed, along
with the percentage of code covered by the tests. None of the tests should
fail and the code lines that are not covered by unit tests should be small and
should not include critical sub-routines.

## Running Patches:
To get data issued during specific date range, output in batch issue format, adjust `params.json` in accordance with `patch.py`, then run
```
env/bin/python -m delphi_doctor_visits.patch
```
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,13 @@ def change_date_format(name):
name = '_'.join(split_name)
return name

def download(ftp_credentials, out_path, logger):
def download(ftp_credentials, out_path, logger, issue=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a general thought: can we introduce using typing for the function parameters? I checked that typing was introduced as a part of the standard library at 3.5 so should be able to use it for this repo. As a newcomer it would be helpful to know the type.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aysim319 can you turn this request into an issue?

"""Pull the latest raw files."""
current_time = datetime.datetime.now()
if issue is None:
current_time = datetime.datetime.now()
else:
current_time = datetime.datetime.strptime(issue, "%Y-%m-%d").replace(hour=23, minute=59, second=59)

logger.info("starting download", time=current_time)
seconds_in_day = 24 * 60 * 60

Expand Down
5 changes: 3 additions & 2 deletions doctor_visits/delphi_doctor_visits/get_latest_claims_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import datetime
from pathlib import Path

def get_latest_filename(dir_path, logger):
def get_latest_filename(dir_path, logger, patch=False):
"""Get the latest filename from the list of downloaded raw files."""
current_date = datetime.datetime.now()
files = list(Path(dir_path).glob("*"))
Expand All @@ -24,7 +24,8 @@ def get_latest_filename(dir_path, logger):
latest_timestamp = timestamp
latest_filename = file

assert current_date.date() == latest_timestamp.date(), "no drop for today"
if not patch:
assert current_date.date() == latest_timestamp.date(), "no drop for today"

logger.info("Latest claims file", filename=latest_filename)

Expand Down
48 changes: 48 additions & 0 deletions doctor_visits/delphi_doctor_visits/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
This module is used for patching data in the delphi_doctor_visits package.

To use this module, you need to specify the range of issue dates in params.json.

It will generate data for that range of issue dates, and store them in batch issue format:
[name-of-patch]/issue_[issue-date]/nssp/actual_data_file.csv
"""

from datetime import datetime, timedelta
from os import makedirs

from delphi_utils import get_structured_logger, read_params

from .run import run_module

if __name__ == "__main__":
# Run the doctor visits indicator for a range of issue dates, specified in params.json using following keys:
# - "patch": Only used for patching data
# - "start_date": str, YYYY-MM-DD format, first issue date
# - "end_date": str, YYYY-MM-DD format, last issue date
# - "patch_dir": str, directory to write all issues output
params = read_params()
logger = get_structured_logger(__name__, filename=params["common"]["log_filename"])

start_issue = datetime.strptime(params["patch"]["start_issue"], "%Y-%m-%d")
end_issue = datetime.strptime(params["patch"]["end_issue"], "%Y-%m-%d")

logger.info(f"""Start patching {params["patch"]["patch_dir"]}""")
logger.info(f"""Start issue: {start_issue.strftime("%Y-%m-%d")}""")
logger.info(f"""End issue: {end_issue.strftime("%Y-%m-%d")}""")

makedirs(params["patch"]["patch_dir"], exist_ok=True)

current_issue = start_issue

while current_issue <= end_issue:
logger.info(f"""Running issue {current_issue.strftime("%Y-%m-%d")}""")

params["patch"]["current_issue"] = current_issue.strftime("%Y-%m-%d")

current_issue_yyyymmdd = current_issue.strftime("%Y%m%d")
current_issue_dir = f"""{params["patch"]["patch_dir"]}/issue_{current_issue_yyyymmdd}/nssp"""
makedirs(f"{current_issue_dir}", exist_ok=True)
params["common"]["export_dir"] = f"""{current_issue_dir}"""

run_module(params)
current_issue += timedelta(days=1)
13 changes: 10 additions & 3 deletions doctor_visits/delphi_doctor_visits/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,25 @@ def run_module(params): # pylint: disable=too-many-statements
- "se": bool, whether to write out standard errors
- "obfuscated_prefix": str, prefix for signal name if write_se is True.
- "parallel": bool, whether to update sensor in parallel.
- "patch": Only used for patching data, remove if not patching.
Check out patch.py and README for more details on how to run patches.
- "start_date": str, YYYY-MM-DD format, first issue date
- "end_date": str, YYYY-MM-DD format, last issue date
- "patch_dir": str, directory to write all issues output
"""
start_time = time.time()
logger = get_structured_logger(
__name__, filename=params["common"].get("log_filename"),
log_exceptions=params["common"].get("log_exceptions", True))

issue = params.get("patch", {}).get("current_issue", None)
patch = issue is not None

# pull latest data
download(params["indicator"]["ftp_credentials"],
params["indicator"]["input_dir"], logger)
download(params["indicator"]["ftp_credentials"], params["indicator"]["input_dir"], logger, issue=issue)

# find the latest files (these have timestamps)
claims_file = get_latest_filename(params["indicator"]["input_dir"], logger)
claims_file = get_latest_filename(params["indicator"]["input_dir"], logger, patch=patch)

# modify data
modify_and_write(claims_file, logger)
Expand Down
Loading