Skip to content

Update quidel covidtest (Add Age Groups Signals, Add rest-of-state reports) #1467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
20a46d2
add age groups
Jan 11, 2022
801e2f5
update code for adding megacounties
Jan 14, 2022
15f7690
update unit tests
Jan 14, 2022
010491d
get smoothers out of the complicated loop
Jan 14, 2022
d215d6c
fix a linting
Jan 14, 2022
a120175
fix an error
Jan 14, 2022
b199cad
ignore too-many-branches in pylintrc
Jan 14, 2022
e7870e8
fix a linting error
Jan 14, 2022
07642fa
update signal names, add two super age groups
Jan 14, 2022
1ee31ae
fix a linting error
Jan 14, 2022
4eee961
remove 18-64 age group
Jan 18, 2022
a81050c
add whitespace and add comments
Jan 18, 2022
dc06d9c
add tests for ages 0-17
Jan 18, 2022
0c0f9f5
small udpates for suggested changes
Jan 18, 2022
1707cfb
add state_id for megacounties
Jan 21, 2022
e1226e1
add tests for state_id
Jan 21, 2022
562773d
Add minimal censored counties test and get error?
dshemetov Jan 21, 2022
ef41f6a
add suggested changes
Jan 21, 2022
c81298b
update unit tests based on the current strategy
Jan 23, 2022
7721290
geo_id should be integers in the unit tests
Jan 23, 2022
b7f94c9
udpate geographical pooling
Jan 25, 2022
1fdbe49
update unit tests
Jan 25, 2022
52a4aa6
update unit tests in test_run
Jan 25, 2022
9f3f6c3
delete trailing whitespaces
Jan 25, 2022
49af726
Add a few tests to double check county censoring
dshemetov Jan 26, 2022
3218b1e
Remove faux-breakpoint, update test_data, update test_run
dshemetov Jan 26, 2022
5c6d798
fix the test in test_run
Jan 27, 2022
3e9232e
remove the question in comments
Jan 27, 2022
ab25b35
add tests for values
Jan 27, 2022
8fbff93
add archiver section to quidel params
Feb 2, 2022
963bb5a
fix params
Feb 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions quidel_covidtest/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
disable=logging-format-interpolation,
too-many-locals,
too-many-arguments,
too-many-branches,
# Allow pytest functions to be part of a class.
no-self-use,
# Allow pytest classes to have one test.
Expand Down
12 changes: 11 additions & 1 deletion quidel_covidtest/delphi_quidel_covidtest/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
MIN_OBS = 50 # minimum number of observations in order to compute a proportion.
POOL_DAYS = 7 # number of days in the past (including today) to pool over
END_FROM_TODAY_MINUS = 5 # report data until - X days
# Signal names
# Signal Types
SMOOTHED_POSITIVE = "covid_ag_smoothed_pct_positive"
RAW_POSITIVE = "covid_ag_raw_pct_positive"
SMOOTHED_TEST_PER_DEVICE = "covid_ag_smoothed_test_per_device"
Expand All @@ -22,6 +22,7 @@
HRR,
]

# state should be last one
NONPARENT_GEO_RESOLUTIONS = [
HHS,
NATION,
Expand All @@ -39,3 +40,12 @@
# SMOOTHED_TEST_PER_DEVICE: (True, True),
# RAW_TEST_PER_DEVICE: (True, False)
}
AGE_GROUPS = [
"total",
"age_0_4",
"age_5_17",
"age_18_49",
"age_50_64",
"age_65plus",
"age_0_17",
]
20 changes: 11 additions & 9 deletions quidel_covidtest/delphi_quidel_covidtest/data_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,14 @@ def _slide_window_sum(arr, k):
sarr = np.convolve(temp, np.ones(k, dtype=int), 'valid')
return sarr


def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
"""
Determine how many samples from the parent geography must be borrowed.

If there are no samples available in the parent, the borrow_prop is 0. If
the parent does not have enough samples, we return a borrow_prop of 1, and
the fact that the pooled samples are insufficient are handled in the
statistic fitting step.
If there are no samples available in the parent, the borrow_prop is 0.
If the parent does not have enough samples, we return a borrow_prop of 1.
No more samples borrowed from the parent compared to the number of samples
we currently have.

Args:
tpooled_tests: np.ndarray[float]
Expand All @@ -93,10 +92,12 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
"""
if (np.any(np.isnan(tpooled_tests)) or np.any(np.isnan(tpooled_ptests))):
raise ValueError('[parent] tests should be non-negative '
'with no np.nan')
'with no np.nan')
# STEP 1: "TOP UP" USING PARENT LOCATION
# Number of observations we need to borrow to "top up"
# Can't borrow more than total no. observations.
borrow_tests = np.maximum(min_obs - tpooled_tests, 0)
borrow_tests = np.minimum(borrow_tests, tpooled_tests)
# There are many cases (a, b > 0):
# Case 1: a / b => no problem
# Case 2: a / 0 => np.inf => borrow_prop becomes 1
Expand All @@ -108,13 +109,14 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
with np.errstate(divide='ignore', invalid='ignore'):
borrow_prop = borrow_tests / tpooled_ptests
# If there's nothing to borrow, then ya can't borrow
borrow_prop[np.isnan(borrow_prop)] = 0
# Can't borrow more than total no. observations.
borrow_prop[(np.isnan(borrow_prop))
| (tpooled_tests == 0)
| (tpooled_ptests == 0)] = 0
# Can't borrow more than total no. observations in the parent state
# Relies on the fact that np.inf > 1
borrow_prop[borrow_prop > 1] = 1
return borrow_prop


def raw_positive_prop(positives, tests, min_obs):
"""
Calculate the proportion of positive tests without any temporal smoothing.
Expand Down
68 changes: 36 additions & 32 deletions quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
smoothed_tests_per_device,
raw_tests_per_device,
remove_null_samples)
from .geo_maps import add_megacounties

MIN_OBS = 50 # minimum number of observations in order to compute a proportion.
POOL_DAYS = 7
from .constants import (MIN_OBS, POOL_DAYS)

def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device, first_date, last_date):
def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device,
first_date, last_date, suffix):
"""
Fit over geo resolutions that don't use a parent state (nation/hhs/state).

Expand All @@ -21,6 +22,8 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device, fir
Consider raw or smooth
device: bool
Consider test_per_device or pct_positive
suffix: str
Indicate the age group
Returns:
df: pd.DataFrame
"""
Expand All @@ -35,27 +38,27 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device, fir
# smoothed test per device
if device & smooth:
stat, se, sample_size = smoothed_tests_per_device(
devices=state_group["numUniqueDevices"].values,
tests=state_group['totalTest'].values,
devices=state_group[f"numUniqueDevices_{suffix}"].values,
tests=state_group[f'totalTest_{suffix}'].values,
min_obs=MIN_OBS, pool_days=POOL_DAYS)
# raw test per device
elif device & (not smooth):
stat, se, sample_size = raw_tests_per_device(
devices=state_group["numUniqueDevices"].values,
tests=state_group['totalTest'].values,
devices=state_group[f"numUniqueDevices_{suffix}"].values,
tests=state_group[f'totalTest_{suffix}'].values,
min_obs=MIN_OBS)
# smoothed pct positive
elif (not device) & smooth:
stat, se, sample_size = smoothed_positive_prop(
tests=state_group['totalTest'].values,
positives=state_group['positiveTest'].values,
tests=state_group[f'totalTest_{suffix}'].values,
positives=state_group[f'positiveTest_{suffix}'].values,
min_obs=MIN_OBS, pool_days=POOL_DAYS)
stat = stat * 100
# raw pct positive
else:
stat, se, sample_size = raw_positive_prop(
tests=state_group['totalTest'].values,
positives=state_group['positiveTest'].values,
tests=state_group[f'totalTest_{suffix}'].values,
positives=state_group[f'positiveTest_{suffix}'].values,
min_obs=MIN_OBS)
stat = stat * 100

Expand All @@ -68,7 +71,7 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device, fir
return remove_null_samples(state_df)

def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
device, first_date, last_date):
device, first_date, last_date, suffix):
"""
Fit over geo resolutions that use a parent state (county/hrr/msa).

Expand All @@ -79,15 +82,16 @@ def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
Consider raw or smooth
device: bool
Consider test_per_device or pct_positive
suffix: str
Indicate the age group
Returns:
df: pd.DataFrame
"""
has_parent = True
res_df = pd.DataFrame(columns=["geo_id", "val", "se", "sample_size"])
res_groups = data.groupby(res_key)
loc_list = list(res_groups.groups.keys())
for loc in loc_list:
res_group = res_groups.get_group(loc)
if res_key == "fips": # Add rest-of-state report for county level
data = add_megacounties(data, smooth)
for loc, res_group in data.groupby(res_key):
parent_state = res_group['state_id'].values[0]
try:
parent_group = state_groups.get_group(parent_state)
Expand All @@ -104,41 +108,41 @@ def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
if has_parent:
if device:
stat, se, sample_size = smoothed_tests_per_device(
devices=res_group["numUniqueDevices"].values,
tests=res_group['totalTest'].values,
devices=res_group[f"numUniqueDevices_{suffix}"].values,
tests=res_group[f'totalTest_{suffix}'].values,
min_obs=MIN_OBS, pool_days=POOL_DAYS,
parent_devices=res_group["numUniqueDevices_parent"].values,
parent_tests=res_group["totalTest_parent"].values)
parent_devices=res_group[f"numUniqueDevices_{suffix}_parent"].values,
parent_tests=res_group[f"totalTest_{suffix}_parent"].values)
else:
stat, se, sample_size = smoothed_positive_prop(
tests=res_group['totalTest'].values,
positives=res_group['positiveTest'].values,
tests=res_group[f'totalTest_{suffix}'].values,
positives=res_group[f'positiveTest_{suffix}'].values,
min_obs=MIN_OBS, pool_days=POOL_DAYS,
parent_tests=res_group["totalTest_parent"].values,
parent_positives=res_group['positiveTest_parent'].values)
parent_tests=res_group[f"totalTest_{suffix}_parent"].values,
parent_positives=res_group[f'positiveTest_{suffix}_parent'].values)
stat = stat * 100
else:
if device:
stat, se, sample_size = smoothed_tests_per_device(
devices=res_group["numUniqueDevices"].values,
tests=res_group['totalTest'].values,
devices=res_group[f"numUniqueDevices_{suffix}"].values,
tests=res_group[f'totalTest_{suffix}'].values,
min_obs=MIN_OBS, pool_days=POOL_DAYS)
else:
stat, se, sample_size = smoothed_positive_prop(
tests=res_group['totalTest'].values,
positives=res_group['positiveTest'].values,
tests=res_group[f'totalTest_{suffix}'].values,
positives=res_group[f'positiveTest_{suffix}'].values,
min_obs=MIN_OBS, pool_days=POOL_DAYS)
stat = stat * 100
else:
if device:
stat, se, sample_size = raw_tests_per_device(
devices=res_group["numUniqueDevices"].values,
tests=res_group['totalTest'].values,
devices=res_group[f"numUniqueDevices_{suffix}"].values,
tests=res_group[f'totalTest_{suffix}'].values,
min_obs=MIN_OBS)
else:
stat, se, sample_size = raw_positive_prop(
tests=res_group['totalTest'].values,
positives=res_group['positiveTest'].values,
tests=res_group[f'totalTest_{suffix}'].values,
positives=res_group[f'positiveTest_{suffix}'].values,
min_obs=MIN_OBS)
stat = stat * 100

Expand Down
43 changes: 40 additions & 3 deletions quidel_covidtest/delphi_quidel_covidtest/geo_maps.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
"""Contains geographic mapping tools."""
from itertools import product
from functools import reduce

import pandas as pd

from delphi_utils import GeoMapper
from .constants import (AGE_GROUPS, MIN_OBS)

DATA_COLS = ['totalTest', 'numUniqueDevices', 'positiveTest', "population"]
DATA_COLS = ['totalTest', 'numUniqueDevices', 'positiveTest']
GMPR = GeoMapper() # Use geo utils
GEO_KEY_DICT = {
"county": "fips",
Expand All @@ -12,21 +18,52 @@
"hhs": "hhs"
}


def geo_map(geo_res, df):
"""Map a geocode to a new value."""
data = df.copy()
geo_key = GEO_KEY_DICT[geo_res]
# Add population for each zipcode
data = GMPR.add_population_column(data, "zip")
# zip -> geo_res
data = GMPR.replace_geocode(data, "zip", geo_key, data_cols=DATA_COLS)
data_cols = ["population"]
for col, agegroup in product(DATA_COLS, AGE_GROUPS):
data_cols.append("_".join([col, agegroup]))

data = GMPR.replace_geocode(
data, from_code="zip", new_code=geo_key, date_col = "timestamp",
data_cols=data_cols)
if geo_res in ["state", "hhs", "nation"]:
return data, geo_key
# Add parent state
data = add_parent_state(data, geo_res, geo_key)
return data, geo_key

def add_megacounties(data, smooth=False):
"""Add megacounties to county level report."""
assert "fips" in data.columns # Make sure the data is at county level

# For raw signals, the threshold is MIN_OBS
# For smoothed signals, the threshold is MIN_OBS/2
if smooth:
threshold_visits = MIN_OBS/2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for this change in thresholding here?

else:
threshold_visits = MIN_OBS
pdList = []
for agegroup in AGE_GROUPS:
data_cols = [f"{col}_{agegroup}" for col in DATA_COLS]
df = GMPR.fips_to_megacounty(data[data_cols + ["timestamp", "fips"]],
threshold_visits, 1, fips_col="fips",
thr_col=f"totalTest_{agegroup}",
date_col="timestamp")
df.rename({"megafips": "fips"}, axis=1, inplace=True)
megacounties = df[df.fips.str.endswith("000")]
pdList.append(megacounties)
mega_df = reduce(lambda x, y: pd.merge(
x, y, on = ["timestamp", "fips"]), pdList)
mega_df = GMPR.add_geocode(mega_df, from_code="fips", new_code="state_id",
from_col="fips", new_col="state_id")

return pd.concat([data, mega_df])

def add_parent_state(data, geo_res, geo_key):
"""
Expand Down
Loading