diff --git a/.gitignore b/.gitignore index 09a07c464..385af7747 100644 --- a/.gitignore +++ b/.gitignore @@ -118,3 +118,8 @@ venv.bak/ # mypy .mypy_cache/ + +# Ansible +.retry +.indicators-ansible-vault-pass +indicators-ansible-vault-pass diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 000000000..220ab2b34 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,83 @@ +#!groovy + +// import shared library: https://github.com/cmu-delphi/jenkins-shared-library +@Library('jenkins-shared-library') _ + +pipeline { + + agent any + + stages { + + stage ("Environment") { + when { + anyOf { + branch "deploy-*"; + changeRequest target: "deploy-*", comparator: "GLOB" + } + } + steps { + script { + // Get the indicator name from the pipeline env. + if ( env.CHANGE_TARGET ) { + INDICATOR = env.CHANGE_TARGET.replaceAll("deploy-", "") + } + else if ( env.BRANCH_NAME ) { + INDICATOR = env.BRANCH_NAME.replaceAll("deploy-", "") + } + else { + INDICATOR = "" + } + } + } + } + + stage('Build') { + when { + changeRequest target: "deploy-*", comparator: "GLOB" + } + steps { + sh "jenkins/${INDICATOR}-jenkins-build.sh" + } + } + + stage('Test') { + when { + changeRequest target: "deploy-*", comparator: "GLOB" + } + steps { + sh "jenkins/${INDICATOR}-jenkins-test.sh" + } + } + + stage('Package') { + when { + changeRequest target: "deploy-*", comparator: "GLOB" + } + steps { + sh "jenkins/${INDICATOR}-jenkins-package.sh" + } + } + + stage('Deploy') { + when { + branch "deploy-*" + } + steps { + sh "jenkins/${INDICATOR}-jenkins-deploy.sh" + } + } + } + + post { + always { + script { + /* + Use slackNotifier.groovy from shared library and provide current + build result as parameter. + */ + slackNotifier(currentBuild.currentResult) + } + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 000000000..2d33da170 --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +# Covidcast Indicators + +Pipeline code and supporting libraries for the **Real-time COVID-19 Indicators** used in the Delphi Group's [**COVIDcast** map](https://covidcast.cmu.edu). + +## The indicators + +Each subdirectory contained here that is named after an indicator has specific documentation. Please review as necessary! + +## General workflow for indicators creation and deployment + +**tl;dr** + +- Create your new indicator branch from `main`. +- Build it using the appropriate template, following the guidelines in the included README.md and REVIEW.md files. +- Make some stuff! +- When your stuff works, push your `dev-*` branch to remote for review. +- Consult with a platform engineer for the remaining production setup needs. They will create a branch called `deploy-*` for your indicator. +- Initiate a pull request against this new branch. +- If your peers like it and Jenkins approves, deploy your changes by merging the PR. +- Rejoice! + +### Starting out + +The `main` branch should contain up-to-date code and supporting libraries. This should be your starting point when creating a new indicator. + +```shell +# Hint +# +git checkout main +git checkout -b dev-my-feature-branch +``` + +### Creating your indicator + +Create a directory for your new indicator by making a copy of `_template_r` or `_template_python` depending on the programming language you intend to use. The template copies of `README.md` and `REVIEW.md` include the minimum requirements for code structure, documentation, linting, testing, and method of configuration. Beyond that, we don't have any established restrictions on implementation; you can look at other existing indicators see some examples of code layout, organization, and general approach. + +- Consult your peers with questions! :handshake: + +Once you have something that runs locally and passes tests you set up your remote branch eventual review and production deployment. + +```shell +# Hint +# +git push -u origin dev-my-feature-branch +``` + +### Setting up for review and deployment + +Once you have your branch set up you should get in touch with a platform engineer to pair up on the remaining production needs. These include: + +- Creating the corresponding `deploy-*` branch in the repo. +- Adding the necessary Jenkins scripts for your indicator. +- Preparing the runtime host with any Automation configuration necessities. +- Reviewing the workflow to make sure it meets the general guidelines and will run as expected on the runtime host. + +Once all the last mile configuration is in place you can create a pull request against the correct `deploy-*` branch to initiate the CI/CD pipeline which will build, test, and package your indicator for deployment. + +If everything looks ok, platform engineering has validated the last mile, and the pull request is accepted, you can merge the PR. Deployment will start automatically. + +Hopefully it'll be a full on :tada:, after that :crossed_fingers: + +If not, circle back and try again. + +## Production overview + +### Running production code + +Currently, the production indicators all live and run on the venerable and perennially useful Delphi primary server (also known generically as "the runtime host"). + +- This is a virtual machine running RHEL 7.5 and living in CMU's Campus Cloud vSphere-based infrastructure environemnt. + +### Delivering an indicator to the production environment + +We use a branch-based git workflow coupled with [Jenkins](https://www.jenkins.io/) and [Ansible](https://www.ansible.com/) to build, test, package, and deploy each indicator individually to the runtime host. + +- Jenkins dutifully manages the whole process for us by executing several "stages" in the context of a [CI/CD pipeline](https://dzone.com/articles/learn-how-to-setup-a-cicd-pipeline-from-scratch). Each stage does something unique, building on the previous stage. The stages are: + - Environment - Sets up some environment-specific needs that the other stages depend on. + - Build - Create the Python venv on the Jenkins host. + - Test - Run linting and unit tests. + - Package - Tar and gzip the built environment. + - Deploy - Trigger an Ansible playbook to place the built package onto the runtime host, place any necessary production configuration, and adjust the runtime envirnemnt (if necessary). + +There are several additional Jenkins-specific files that will need to be created for each indicator, as well as some configuration additions to the runtime host. It will be important to pair with a platform engineer to prepare the necessary production environment needs, test the workflow, validate on production, and ultimately sign off on a production release. diff --git a/ansible/ansible-deploy.yaml b/ansible/ansible-deploy.yaml new file mode 100644 index 000000000..87b017be8 --- /dev/null +++ b/ansible/ansible-deploy.yaml @@ -0,0 +1,26 @@ +--- +- hosts: runtime_host + vars_files: + - vars.yaml + tasks: + - name: Copy and unarchive the package into the indicators runtime host directory. + unarchive: + src: "{{ jenkins_artifact_dir }}/{{ package }}" + dest: "{{ indicators_runtime_dir }}" + owner: "{{ runtime_user }}" + group: "{{ runtime_user }}" + + - name: Mutate Python bin path used in venv. + file: + src: "{{ pyenv_python_path }}" + dest: "{{ indicators_runtime_dir }}/{{ indicator }}/env/bin/python" + owner: "{{ runtime_user }}" + group: "{{ runtime_user }}" + state: link + + - name: Set production params file. + copy: + src: files/{{ indicator }}-params-prod.json + dest: "{{ indicators_runtime_dir }}/{{ indicator }}/params.json" + owner: "{{ runtime_user }}" + group: "{{ runtime_user }}" diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 000000000..0850ef830 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,8 @@ +[defaults] +remote_user = indicators +vault_password_file = ~/.indicators-ansible-vault-pass +ansible_managed = This file is managed by Ansible.%n + Template: {file} + Date: %Y-%m-%d %H:%M:%S + User: {uid} + Host: {host} diff --git a/ansible/files/jhu-params-prod.json b/ansible/files/jhu-params-prod.json new file mode 100644 index 000000000..daf7d7097 --- /dev/null +++ b/ansible/files/jhu-params-prod.json @@ -0,0 +1,7 @@ +{ + "export_start_date": "2020-02-20", + "static_file_dir": "./static", + "export_dir": "/common/covidcast/receiving/jhu-csse/", + "cache_dir": "./cache", + "base_url": "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric}_US.csv" +} diff --git a/ansible/inventory b/ansible/inventory new file mode 100644 index 000000000..5727aad2d --- /dev/null +++ b/ansible/inventory @@ -0,0 +1,2 @@ +[runtime_host] +delphi-master-prod-01.delphi.cmu.edu \ No newline at end of file diff --git a/ansible/vars.yaml b/ansible/vars.yaml new file mode 100644 index 000000000..f57b2e387 --- /dev/null +++ b/ansible/vars.yaml @@ -0,0 +1,7 @@ +--- +runtime_user: "indicators" +jenkins_artifact_dir: "/var/lib/jenkins/artifacts" +indicators_runtime_dir: "/home/{{ runtime_user }}/runtime" +package: "{{ indicator }}.tar.gz" # This is passed in the Ansible invocation. +python_version: "3.8.2" +pyenv_python_path: "/home/{{ runtime_user }}/.pyenv/versions/{{ python_version }}/bin/python" diff --git a/jenkins/jhu-jenkins-build.sh b/jenkins/jhu-jenkins-build.sh new file mode 100755 index 000000000..2b9808cfb --- /dev/null +++ b/jenkins/jhu-jenkins-build.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# +# JHU: Jenkins build +# + +set -exo pipefail +source ~/.bash_profile + +# +# Build +# + +local_indicator="jhu" + +cd "${WORKSPACE}/${local_indicator}" || exit + +# Set up venv +python -m venv env +source env/bin/activate +pip install ../_delphi_utils_python/. +pip install . diff --git a/jenkins/jhu-jenkins-deploy.sh b/jenkins/jhu-jenkins-deploy.sh new file mode 100755 index 000000000..680349348 --- /dev/null +++ b/jenkins/jhu-jenkins-deploy.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# +# Jenkins deploy +# + +set -exo pipefail +source ~/.bash_profile + +# +# Deploy +# + +local_indicator="jhu" + +cd "${WORKSPACE}/ansible" || exit + +# Ansible! +ansible-playbook ansible-deploy.yaml --extra-vars "indicator=${local_indicator}" -i inventory diff --git a/jenkins/jhu-jenkins-package.sh b/jenkins/jhu-jenkins-package.sh new file mode 100755 index 000000000..47d000c11 --- /dev/null +++ b/jenkins/jhu-jenkins-package.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# +# Jenkins package +# + +set -exo pipefail +source ~/.bash_profile + +# +# Package +# + +local_indicator="jhu" + +cd "${WORKSPACE}" || exit + +# Create .tar.gz for deployment +tar -czvf "${JENKINS_HOME}/artifacts/${local_indicator}.tar.gz" "${local_indicator}" diff --git a/jenkins/jhu-jenkins-test.sh b/jenkins/jhu-jenkins-test.sh new file mode 100755 index 000000000..680a3aa94 --- /dev/null +++ b/jenkins/jhu-jenkins-test.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# +# JHU: Jenkins test +# + +set -exo pipefail +source ~/.bash_profile + +# +# Test +# + +local_indicator="jhu" + +cd "${WORKSPACE}/${local_indicator}" || exit + +# Linter +env/bin/pylint delphi_"${local_indicator}" + +# Unit tests and code coverage +cd tests || exit && \ + ../env/bin/pytest --cov=delphi_"${local_indicator}" --cov-report=term-missing diff --git a/jhu/DETAILS.md b/jhu/DETAILS.md index 7850a2343..125372f38 100644 --- a/jhu/DETAILS.md +++ b/jhu/DETAILS.md @@ -57,7 +57,10 @@ New York City comprises of five boroughs: **Data from all five boroughs are reported under New York County, FIPS Code 36061.** The other four boroughs are included in the dataset -and show up in our API, but they should be uniformly zero. +and show up in our API, but they should be uniformly zero. (In our population +file under static folder, the population from all five boroughs are also +assigned to FIPS Code 36061 only. The populatio for the rest of the counties +are set to be 1.) All NYC counts are mapped to the MSA with CBSA ID 35620, which encompasses all five boroughs. All NYC counts are mapped to HRR 303, which intersects diff --git a/jhu/README.md b/jhu/README.md index 72e43f7b7..f50712de0 100644 --- a/jhu/README.md +++ b/jhu/README.md @@ -58,3 +58,8 @@ The output will show the number of unit tests that passed and failed, along with the percentage of code covered by the tests. None of the tests should fail and the code lines that are not covered by unit tests should be small and should not include critical sub-routines. + +- Jenkins test #1 +- Jenkins test #2 +- Jenkins test #3 +- Jenkins test #4 diff --git a/jhu/delphi_jhu/geo.py b/jhu/delphi_jhu/geo.py index 4e305917b..c471a4ae3 100644 --- a/jhu/delphi_jhu/geo.py +++ b/jhu/delphi_jhu/geo.py @@ -89,6 +89,10 @@ FIPS_TO_STATE = {v: k.lower() for k, v in STATE_TO_FIPS.items()} +# Fake fips to States + +JHU_FAKE_FIPS_TO_MEGA_FIPS = {f'900{x}' : f'{x}000' for x in STATE_TO_FIPS.values()} + def fips_to_state(fips: str) -> str: """Wrapper that handles exceptions to the FIPS scheme in the JHU data. @@ -148,7 +152,7 @@ def disburse(df: pd.DataFrame, pooled_fips: str, fips_list: list): return df -def geo_map(df: pd.DataFrame, geo_res: str, map_df: pd.DataFrame): +def geo_map(df: pd.DataFrame, geo_res: str, map_df: pd.DataFrame, sensor: str): """ Maps a DataFrame df, which contains data at the county resolution, and aggregate it to the geographic resolution geo_res. @@ -162,6 +166,10 @@ def geo_map(df: pd.DataFrame, geo_res: str, map_df: pd.DataFrame): ('county', 'state', 'msa', 'hrr'). map_df: pd.DataFrame Loaded from static file "fips_prop_pop.csv". + sensor: str + sensor type. Valid options: + ("new_counts", "cumulative_counts", + "incidence", "cumulative_prop") Returns ------- @@ -169,15 +177,27 @@ def geo_map(df: pd.DataFrame, geo_res: str, map_df: pd.DataFrame): Columns: geo_id, timestamp, ... """ VALID_GEO_RES = ("county", "state", "msa", "hrr") + #It is not clear to calculate the proportion for unassigned cases/deaths + PROP_SENSORS = ("incidence", "cumulative_prop") if geo_res not in VALID_GEO_RES: raise ValueError(f"geo_res must be one of {VALID_GEO_RES}") - df = df.copy() + + df_mega = df[df['fips'].astype(int) >= 90001].copy() + df_mega['geo_id'] = df_mega['fips'].apply(lambda x: JHU_FAKE_FIPS_TO_MEGA_FIPS[x]) + + df = df[df['fips'].astype(int) < 90001].copy() + if geo_res == "county": df["geo_id"] = df["fips"] + if sensor not in PROP_SENSORS: + df = df.append(df_mega) elif geo_res == "state": # Grab first two digits of fips # Map state fips to us postal code - df["geo_id"] = df["fips"].apply(fips_to_state) + df["geo_id"] = df["fips"] + # Add unassigned cases/deaths + df = df.append(df_mega) + df["geo_id"] = df["geo_id"].apply(fips_to_state) elif geo_res in ("msa", "hrr"): # Disburse Dukes & Nantucket to individual counties df = disburse(df, DN_FIPS, DN_COUNTY_FIPS) @@ -200,8 +220,13 @@ def geo_map(df: pd.DataFrame, geo_res: str, map_df: pd.DataFrame): merged["new_counts"] = merged["new_counts"] * merged["pop_prop"] merged["population"] = merged["population"] * merged["pop_prop"] df = merged.drop(["zip", "pop_prop", "hrrnum", "cbsa_id"], axis=1) + # if sensor not in PROP_SENSORS: + # df_mega["geo_id"] = df_mega["geo_id"].apply(fips_to_state) + # df = df.append(df_mega) df = df.drop("fips", axis=1) df = df.groupby(["geo_id", "timestamp"]).sum().reset_index() + + # Value would be negative for megacounties , which would not be considered in the main function df["incidence"] = df["new_counts"] / df["population"] * INCIDENCE_BASE df["cumulative_prop"] = df["cumulative_counts"] / df["population"] * INCIDENCE_BASE return df diff --git a/jhu/delphi_jhu/pull.py b/jhu/delphi_jhu/pull.py index 1049d5e0a..d4131db82 100644 --- a/jhu/delphi_jhu/pull.py +++ b/jhu/delphi_jhu/pull.py @@ -62,7 +62,7 @@ def pull_jhu_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.DataFr MIN_FIPS = 1000 MAX_FIPS = 57000 EXTRA_FIPS = ( - 72, # Puerto Rico (provided as the entire state) + 72, # Puerto Rico (provided as the entire state) 70002, # Kansas City, MO 70003, # Dukes and Nantucket Counties, MA ) @@ -79,9 +79,13 @@ def pull_jhu_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.DataFr & (df["FIPS"] < MAX_FIPS) ) # "Uncategorized", etc. | df["FIPS"].isin(EXTRA_FIPS) + # Get Fake FIPS for unassigned cases + | np.logical_and(df['FIPS'] >= 90001, + df['FIPS'] <= 90056) ] # Merge in population LOWERCASE, consistent across confirmed and deaths - df = pd.merge(df, pop_df, on="FIPS") + # Set population as NAN for fake fips + df = pd.merge(df, pop_df, on="FIPS", how='left') # Manual correction for PR df.loc[df["FIPS"] == 72, "FIPS"] = 72000 diff --git a/jhu/delphi_jhu/run.py b/jhu/delphi_jhu/run.py index 5dc4437d9..6758b0d4f 100644 --- a/jhu/delphi_jhu/run.py +++ b/jhu/delphi_jhu/run.py @@ -77,7 +77,7 @@ def run_module(): print(geo_res, metric, sensor, smoother) df = dfs[metric] # Aggregate to appropriate geographic resolution - df = geo_map(df, geo_res, map_df) + df = geo_map(df, geo_res, map_df, sensor) df["val"] = SMOOTHERS_MAP[smoother][0](df[sensor].values) df["se"] = np.nan df["sample_size"] = np.nan diff --git a/jhu/tests/receiving/.gitignore b/jhu/tests/receiving/.gitignore index e69de29bb..552154e09 100644 --- a/jhu/tests/receiving/.gitignore +++ b/jhu/tests/receiving/.gitignore @@ -0,0 +1,120 @@ +# You should hard commit a prototype for this file, but we +# want to avoid accidental adding of API tokens and other +# private data parameters +params.json + +# Do not commit output files +receiving/*.csv + +# Remove macOS files +.DS_Store + +# virtual environment +dview/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +coverage.xml +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +.static_storage/ +.media/ +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/jhu/tests/test_geo.py b/jhu/tests/test_geo.py index 8d45eb336..646adb24a 100644 --- a/jhu/tests/test_geo.py +++ b/jhu/tests/test_geo.py @@ -25,6 +25,13 @@ def test_normal(self): assert fips_to_state("12003") == "fl" assert fips_to_state("50103") == "vt" assert fips_to_state("15003") == "hi" + + def test_mega(self): + + assert fips_to_state("01000") == "al" + assert fips_to_state("13000") == "ga" + assert fips_to_state("44000") == "ri" + assert fips_to_state("12000") == "fl" class TestDisburse: @@ -60,7 +67,7 @@ def test_incorrect_geo(self): ) with pytest.raises(ValueError): - geo_map(df, "département", MAP_DF) + geo_map(df, "département", MAP_DF, 'new_counts') def test_county(self): @@ -74,15 +81,27 @@ def test_county(self): } ) - new_df = geo_map(df, "county", MAP_DF) + df_mega = pd.DataFrame( + { + "fips": ["90013", "90001"], + "timestamp": ["2020-02-15", "2020-02-15"], + "new_counts": [8, 2], + "cumulative_counts": [80, 12], + "population": [np.nan, np.nan], + } + ) + + df = df.append(df_mega) + + new_df = geo_map(df, "county", MAP_DF, 'new_counts') exp_incidence = df["new_counts"] / df["population"] * 100000 exp_cprop = df["cumulative_counts"] / df["population"] * 100000 - - assert set(new_df["geo_id"].values) == set(df["fips"].values) + + assert set(new_df["geo_id"].values) == set(['01000', '13000', '48027', '50103', '53003']) assert set(new_df["timestamp"].values) == set(df["timestamp"].values) - assert set(new_df["incidence"].values) == set(exp_incidence.values) - assert set(new_df["cumulative_prop"].values) == set(exp_cprop.values) + assert set(new_df["incidence"].values) - set(exp_incidence.values) == set([np.Inf]) + assert set(new_df["cumulative_prop"].values) - set(exp_cprop.values) == set([np.Inf]) def test_state(self): @@ -95,19 +114,31 @@ def test_state(self): "population": [100, 2100, 300, 25], } ) + + df_mega = pd.DataFrame( + { + "fips": ["90013", "90001", "04000", "25000"], + "timestamp": ["2020-02-15", "2020-02-15", "2020-02-15", "2020-02-15"], + "new_counts": [8, 2, 5, 10], + "cumulative_counts": [80, 12, 30, 100], + "population": [np.nan, np.nan, np.nan, np.nan], + } + ) + + df = df.append(df_mega) - new_df = geo_map(df, "state", MAP_DF) + new_df = geo_map(df, "state", MAP_DF, 'new_counts') - exp_incidence = np.array([27, 13]) / np.array([2500, 25]) * 100000 - exp_cprop = np.array([165, 60]) / np.array([2500, 25]) * 100000 + exp_incidence = np.array([27 + 5, 13 + 10]) / np.array([2500, 25]) * 100000 + exp_cprop = np.array([165 + 30, 60 + 100]) / np.array([2500, 25]) * 100000 - assert (new_df["geo_id"].values == ["az", "ma"]).all() - assert (new_df["timestamp"].values == ["2020-02-15", "2020-02-15"]).all() - assert (new_df["new_counts"].values == [27, 13]).all() - assert (new_df["cumulative_counts"].values == [165, 60]).all() - assert (new_df["population"].values == [2500, 25]).all() - assert (new_df["incidence"].values == exp_incidence).all() - assert (new_df["cumulative_prop"].values == exp_cprop).all() + assert set(new_df["geo_id"].values) == set(["az", "ma", "al", "ga"]) + assert set(new_df["timestamp"].values) == set(["2020-02-15"]) + assert set(new_df["new_counts"].values) == set([32, 23, 2, 8]) + assert set(new_df["cumulative_counts"].values) == set([195, 160, 12, 80]) + assert set(new_df["population"].values) == set([2500, 25, 0]) + assert set(new_df["incidence"].values) - set(exp_incidence) == set([np.Inf]) + assert set(new_df["cumulative_prop"].values) - set(exp_cprop) == set([np.Inf]) def test_hrr(self): @@ -121,7 +152,19 @@ def test_hrr(self): } ) - new_df = geo_map(df, "hrr", MAP_DF) + # df_mega = pd.DataFrame( + # { + # "fips": ["90013", "90001"], + # "timestamp": ["2020-02-15", "2020-02-15"], + # "new_counts": [8, 2], + # "cumulative_counts": [80, 12], + # "population": [np.nan, np.nan], + # } + # ) + + # df = df.append(df_mega) + + new_df = geo_map(df, "hrr", MAP_DF, 'new_counts') exp_incidence = np.array([13, 27]) / np.array([25, 2500]) * 100000 exp_cprop = np.array([60, 165]) / np.array([25, 2500]) * 100000 @@ -145,8 +188,20 @@ def test_msa(self): "population": [100, 2100, 300, 25], } ) - - new_df = geo_map(df, "msa", MAP_DF) + + # df_mega = pd.DataFrame( + # { + # "fips": ["90013", "90001"], + # "timestamp": ["2020-02-15", "2020-02-15"], + # "new_counts": [8, 2], + # "cumulative_counts": [80, 12], + # "population": [np.nan, np.nan], + # } + # ) + + # df = df.append(df_mega) + + new_df = geo_map(df, "msa", MAP_DF, 'new_counts') exp_incidence = np.array([2, 13]) / np.array([300, 25]) * 100000 exp_cprop = np.array([45, 60]) / np.array([300, 25]) * 100000 diff --git a/jhu/tests/test_run.py b/jhu/tests/test_run.py index 246cf7e66..60d3e13b1 100644 --- a/jhu/tests/test_run.py +++ b/jhu/tests/test_run.py @@ -30,8 +30,8 @@ def test_output_files_exist(self, run_as_module): "confirmed_cumulative_num", "confirmed_incidence_num", "confirmed_incidence_prop", - "wip_deaths_cumulative_prop", - "wip_confirmed_cumulative_prop", + "deaths_7dav_cumulative_prop", + "confirmed_7dav_cumulative_prop", ] expected_files = []