Skip to content

cdc_covidnet : standardizing signal names #207

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cdc_covidnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ should be raised and they should be manually checked (or better, fixed).

Unit tests are also included in the module. To execute these, run the following
command from this directory:

(Note: the following command requires python 3.8, having any version less than 3.8 might
fail some test cases. Please install it before running.)
```
(cd tests && ../env/bin/pytest --cov=delphi_cdc_covidnet --cov-report=term-missing)
```
Expand Down
5 changes: 5 additions & 0 deletions cdc_covidnet/delphi_cdc_covidnet/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Registry for signal names
"""
COVIDNET = "covidnet"
SIGNALS = [COVIDNET]
62 changes: 60 additions & 2 deletions cdc_covidnet/delphi_cdc_covidnet/update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
import numpy as np
import pandas as pd

from delphi_utils import read_params
import covidcast
from .api_config import APIConfig
from .covidnet import CovidNet
from .geo_maps import GeoMaps
from .constants import SIGNALS

def write_to_csv(data: pd.DataFrame, out_name: str, output_path: str):
"""
Expand Down Expand Up @@ -93,7 +96,62 @@ def update_sensor(
hosp_df["sample_size"] = np.nan

# Write results
out_name = "wip_covidnet"
write_to_csv(hosp_df, out_name, output_path)
signals = add_prefix(SIGNALS, wip_signal=read_params()["wip_signal"], prefix="wip_")
for signal in signals:
write_to_csv(hosp_df, signal, output_path)

return hosp_df


def add_prefix(signal_names, wip_signal, prefix):
"""Adds prefix to signal if there is a WIP signal
Parameters
----------
signal_names: List[str]
Names of signals to be exported
prefix : 'wip_'
prefix for new/non public signals
wip_signal : List[str] or bool
a list of wip signals: [], OR
all signals in the registry: True OR
only signals that have never been published: False
Returns
-------
List of signal names
wip/non wip signals for further computation
"""

if wip_signal is True:
return [prefix + signal for signal in signal_names]
if isinstance(wip_signal, list):
make_wip = set(wip_signal)
return [
(prefix if signal in make_wip else "") + signal
for signal in signal_names
]
if wip_signal in {False, ""}:
return [
signal if public_signal(signal)
else prefix + signal
for signal in signal_names
]
raise ValueError("Supply True | False or '' or [] | list()")


def public_signal(signal_):
"""Checks if the signal name is already public using COVIDcast
Parameters
----------
signal_ : str
Name of the signal
Returns
-------
bool
True if the signal is present
False if the signal is not present
"""
epidata_df = covidcast.metadata()
for index in range(len(epidata_df)):
if epidata_df['signal'][index] == signal_:
return True
return False
3 changes: 2 additions & 1 deletion cdc_covidnet/params.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"cache_dir": "./cache",
"start_date": "2020-03-07",
"end_date": "",
"parallel": false
"parallel": false,
"wip_signal": ""
}
3 changes: 2 additions & 1 deletion cdc_covidnet/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"pytest-cov",
"pylint",
"delphi-utils",
"requests"
"requests",
"covidcast"
]

setup(
Expand Down
3 changes: 2 additions & 1 deletion cdc_covidnet/tests/params.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"cache_dir": "./cache",
"start_date": "2020-03-07",
"end_date": "",
"parallel": true
"parallel": true,
"wip_signal": ""
}
33,100 changes: 33,100 additions & 0 deletions cdc_covidnet/tests/static/02_20_uszips.csv

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions cdc_covidnet/tests/test_handle_wip_signal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import unittest
from delphi_cdc_covidnet.update_sensor import add_prefix
from delphi_cdc_covidnet.constants import *

def test_handle_wip_signal():
# Test wip_signal = True, add prefix to all signals
signal_names = add_prefix(SIGNALS, True, prefix="wip_")
assert all(s.startswith("wip_") for s in signal_names)
# Test wip_signal = list, add prefix to listed signals
signal_names = add_prefix(SIGNALS, [SIGNALS[0]], prefix="wip_")
assert signal_names[0].startswith("wip_")
assert all(not s.startswith("wip_") for s in signal_names[1:])
# Test wip_signal = False, add prefix to unpublished signals
signal_names = add_prefix(["xyzzy", SIGNALS[0]], False, prefix="wip_")
assert signal_names[0].startswith("wip_")
assert all(s.startswith("wip_") for s in signal_names[1:])


class MyTestCase(unittest.TestCase):
pass


if __name__ == '__main__':
unittest.main()