Skip to content

Commit fe80833

Browse files
authored
Merge pull request #538 from cmu-delphi/all-deploys
Sync all deploy branches into main in prep for generating prod branch
2 parents 6a7fd65 + 017d96e commit fe80833

File tree

9 files changed

+163
-111
lines changed

9 files changed

+163
-111
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"static_file_dir": "./static",
3+
"export_dir": "/common/covidcast/receiving/quidel",
4+
"cache_dir": "./cache",
5+
"export_start_date": "2020-05-26",
6+
"export_end_date": "",
7+
"pull_start_date": "2020-05-26",
8+
"pull_end_date":"",
9+
"mail_server": "{{ quidel_datadrop_mail_server }}",
10+
"account": "{{ quidel_datadrop_account }}",
11+
"password": "{{ quidel_datadrop_password }}",
12+
"sender": "{{ quidel_datadrop_sender }}",
13+
"wip_signal": [""],
14+
"mode": ""
15+
}

jenkins/quidel-jenkins-build.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Jenkins build
4+
#
5+
6+
set -eo pipefail
7+
source ~/.bash_profile
8+
9+
#
10+
# Build
11+
#
12+
13+
local_indicator="quidel_covidtest"
14+
15+
cd "${WORKSPACE}/${local_indicator}" || exit
16+
17+
# Set up venv
18+
python -m venv env
19+
source env/bin/activate
20+
pip install ../_delphi_utils_python/.
21+
pip install .

jenkins/quidel-jenkins-deploy.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Jenkins deploy
4+
#
5+
6+
set -eo pipefail
7+
source ~/.bash_profile
8+
9+
#
10+
# Deploy
11+
#
12+
13+
local_indicator="quidel_covidtest"
14+
15+
cd "${WORKSPACE}/ansible" || exit
16+
17+
# Ansible!
18+
ansible-playbook ansible-deploy.yaml --extra-vars "indicator=${local_indicator}" -i inventory

jenkins/quidel-jenkins-package.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Jenkins package
4+
#
5+
6+
set -eo pipefail
7+
source ~/.bash_profile
8+
9+
#
10+
# Package
11+
#
12+
13+
local_indicator="quidel_covidtest"
14+
15+
cd "${WORKSPACE}" || exit
16+
17+
# Create .tar.gz for deployment
18+
tar -czvf "${JENKINS_HOME}/artifacts/${local_indicator}.tar.gz" "${local_indicator}"

jenkins/quidel-jenkins-test.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Jenkins test
4+
#
5+
6+
set -eo pipefail
7+
source ~/.bash_profile
8+
9+
#
10+
# Test
11+
#
12+
13+
local_indicator="quidel_covidtest"
14+
15+
cd "${WORKSPACE}/${local_indicator}" || exit
16+
17+
# Linter
18+
env/bin/pylint delphi_"${local_indicator}"
19+
20+
# Unit tests and code coverage
21+
cd tests || exit && \
22+
../env/bin/pytest --cov=delphi_"${local_indicator}" --cov-report=term-missing

quidel_covidtest/.pylintrc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ attr-rgx=[a-z_][a-z0-9_]*
1919
[DESIGN]
2020

2121
# Don't complain about pytest "unused" arguments.
22-
ignored-argument-names=(_.*|run_as_module)
22+
ignored-argument-names=(_.*|run_as_module)
23+
disable=R0801, C0330, E1101, E0611, C0114, C0116, C0103, R0913, R0914, W0702, W0212, E1136
Lines changed: 44 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,49 @@
11
"""Contains geographic mapping tools."""
2-
3-
def zip_to_msa(data, map_df):
4-
"""Map from zipcode to MSA (along with parent state).
5-
Args:
6-
data: dataframe at the day-zip resolution.
7-
Returns:
8-
tuple, a dataframe at day-msa, with parent state column, and their string keys
2+
from delphi_utils import GeoMapper
3+
4+
date_col = "timestamp"
5+
data_cols = ['totalTest', 'numUniqueDevices', 'positiveTest', "population"]
6+
gmpr = GeoMapper() # Use geo utils
7+
GEO_KEY_DICT = {
8+
"county": "fips",
9+
"msa": "msa",
10+
"hrr": "hrr",
11+
"state": "state_id"
12+
}
13+
def geo_map(geo_res, df):
14+
data = df.copy()
15+
geo_key = GEO_KEY_DICT[geo_res]
16+
# Add population for each zipcode
17+
data = gmpr.add_population_column(data, "zip")
18+
# zip -> geo_res
19+
data = gmpr.replace_geocode(data, "zip", geo_key,
20+
date_col=date_col, data_cols=data_cols)
21+
if geo_res == "state":
22+
return data
23+
# Add parent state
24+
data = add_parent_state(data, geo_res, geo_key)
25+
return data, geo_key
26+
27+
def add_parent_state(data, geo_res, geo_key):
928
"""
10-
# zip -> msa
11-
zip_map = map_df[["zip", "cbsa_id"]].dropna().drop_duplicates()
12-
# forget about the rest of the zips that aren't in MSA
13-
data = data.merge(zip_map, how="left", on="zip").dropna().drop(columns=["zip"], axis=1)
14-
15-
# msa + parent state
16-
# msa_map has mapping from msa to state, going by the state with the largest
17-
# population (since a msa may span multiple states)
18-
msa_map = map_df[["cbsa_id", "state_id", "population"]]
19-
msa_map = msa_map.groupby(["cbsa_id"]).max().reset_index()
20-
data = data.merge(msa_map, how="left", on="cbsa_id").drop(
21-
columns=["population"]).dropna()
22-
data = data.groupby(["timestamp", "cbsa_id", "state_id"]).sum().reset_index()
23-
data["cbsa_id"] = data["cbsa_id"].apply(lambda x: str(int(x)).zfill(5))
24-
25-
return data, "cbsa_id"
26-
27-
def zip_to_hrr(data, map_df):
28-
"""Map from zipcode to HRR (along with parent state).
29-
Args:
30-
data: dataframe at the day-zip resolution.
31-
Returns:
32-
tuple, a dataframe at day-msa, with parent state column, and their string keys
29+
- map from msa/hrr to state, going by the state with the largest
30+
population (since a msa/hrr may span multiple states)
31+
- map from county to the corresponding state
3332
"""
34-
# zip -> msa
35-
zip_map = map_df[["zip", "hrrnum"]].dropna().drop_duplicates()
36-
# forget about the rest of the zips that aren't in MSA
37-
data = data.merge(zip_map, how="left", on="zip").dropna().drop(columns=["zip"], axis=1)
38-
39-
# msa + parent state
40-
# msa_map has mapping from msa to state, going by the state with the largest
41-
# population (since a msa may span multiple states)
42-
msa_map = map_df[["hrrnum", "state_id", "population"]]
43-
msa_map = msa_map.groupby(["hrrnum"]).max().reset_index()
44-
data = data.merge(msa_map, how="left", on="hrrnum").drop(
33+
fips_to_state = gmpr._load_crosswalk(from_code="fips", to_code="state")
34+
if geo_res == "county":
35+
mix_map = fips_to_state[["fips", "state_id"]]
36+
else:
37+
fips_to_geo_res = gmpr._load_crosswalk(from_code="fips", to_code=geo_res)
38+
mix_map = fips_to_geo_res[["fips", geo_res]].merge(
39+
fips_to_state[["fips", "state_id"]],
40+
on="fips",
41+
how="inner")
42+
mix_map = gmpr.add_population_column(mix_map, "fips").groupby(
43+
geo_res).max().reset_index().drop(
44+
["fips", "population"], axis = 1)
45+
# Merge the info of parent state to the data
46+
data = data.merge(mix_map, how="left", on=geo_key).drop(
4547
columns=["population"]).dropna()
46-
data = data.groupby(["timestamp", "hrrnum", "state_id"]).sum().reset_index()
47-
data["hrrnum"] = data["hrrnum"].astype(int)
48-
49-
return data, "hrrnum"
50-
51-
def zip_to_county(data, map_df):
52-
"""Aggregate zip codes to the county resolution, along with its parent state.
53-
Args:
54-
data: dataframe aggregated to the day-zip resolution
55-
Returns:
56-
dataframe at the day-county resolution and parent state, with their string keys
57-
"""
58-
# zip -> county + parent state (county has unique state)
59-
zip_map = map_df[["fips", "zip", "state_id"]].dropna().drop_duplicates()
60-
data = data.merge(zip_map, how="left", on="zip").drop(columns=["zip"]).dropna()
61-
data = data.groupby(["timestamp", "fips", "state_id"]).sum().reset_index()
62-
data["fips"] = data["fips"].apply(lambda x: str(int(x)).zfill(5))
63-
64-
return data, "fips"
65-
66-
def zip_to_state(data, map_df):
67-
"""Aggregate zip codes to the state resolution.
68-
Args:
69-
data: dataframe aggregated to the day-zip resolution
70-
Returns:
71-
dataframe at the day-state resolution, with the state key
72-
"""
73-
zip_map = map_df[["zip", "state_id"]].dropna().drop_duplicates()
74-
data = data.merge(zip_map, how="left", on="zip").drop(
75-
columns=["zip"]).dropna()
76-
data = data.groupby(["timestamp", "state_id"]).sum().reset_index()
48+
data = data.groupby(["timestamp", geo_key, "state_id"]).sum().reset_index()
7749
return data

quidel_covidtest/delphi_quidel_covidtest/run.py

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,9 @@
44
This module should contain a function called `run_module`, that is executed
55
when the module is run with `python -m MODULE_NAME`.
66
"""
7-
from os.path import join
8-
9-
import pandas as pd
107
from delphi_utils import read_params, add_prefix
118

12-
from .geo_maps import (zip_to_msa, zip_to_hrr, zip_to_county, zip_to_state)
9+
from .geo_maps import geo_map
1310
from .pull import (pull_quidel_covidtest,
1411
check_export_start_date,
1512
check_export_end_date,
@@ -20,21 +17,16 @@
2017
from .constants import (END_FROM_TODAY_MINUS, EXPORT_DAY_RANGE,
2118
SMOOTHED_POSITIVE, RAW_POSITIVE,
2219
SMOOTHED_TEST_PER_DEVICE, RAW_TEST_PER_DEVICE,
23-
GEO_RESOLUTIONS, SENSORS, SMOOTHERS,
24-
COUNTY, MSA)
20+
GEO_RESOLUTIONS, SENSORS, SMOOTHERS)
2521

2622

2723
def run_module():
2824
"""Run the quidel_covidtest indicator."""
2925
params = read_params()
3026
cache_dir = params["cache_dir"]
3127
export_dir = params["export_dir"]
32-
static_file_dir = params["static_file_dir"]
3328
export_start_date = params["export_start_date"]
3429
export_end_date = params["export_end_date"]
35-
map_df = pd.read_csv(
36-
join(static_file_dir, "fips_prop_pop.csv"), dtype={"fips": int}
37-
)
3830

3931
# Pull data and update export date
4032
df, _end_date = pull_quidel_covidtest(params)
@@ -50,7 +42,7 @@ def run_module():
5042

5143
# State Level
5244
data = df.copy()
53-
state_groups = zip_to_state(data, map_df).groupby("state_id")
45+
state_groups = geo_map("state", data).groupby("state_id")
5446

5547
# Add prefix, if required
5648
sensors = add_prefix(SENSORS,
@@ -76,21 +68,15 @@ def run_module():
7668
export_csv(state_df, "state", sensor, receiving_dir=export_dir,
7769
start_date=export_start_date, end_date=export_end_date)
7870

79-
# County/HRR/MSA level
80-
for geo_res in GEO_RESOLUTIONS:
71+
# County/HRR/MSA level
72+
for geo_res in GEO_RESOLUTIONS:
73+
geo_data, res_key = geo_map(geo_res, data)
74+
for sensor in sensors:
8175
print(geo_res, sensor)
82-
data = df.copy()
83-
if geo_res == COUNTY:
84-
data, res_key = zip_to_county(data, map_df)
85-
elif geo_res == MSA:
86-
data, res_key = zip_to_msa(data, map_df)
87-
else:
88-
data, res_key = zip_to_hrr(data, map_df)
89-
9076
res_df = generate_sensor_for_other_geores(
91-
state_groups, data, res_key, smooth=smoothers[sensor][1],
92-
device=smoothers[sensor][0], first_date=first_date,
93-
last_date=last_date)
77+
state_groups, geo_data, res_key, smooth=smoothers[sensor][1],
78+
device=smoothers[sensor][0], first_date=first_date,
79+
last_date=last_date)
9480
export_csv(res_df, geo_res, sensor, receiving_dir=export_dir,
9581
start_date=export_start_date, end_date=export_end_date)
9682

quidel_covidtest/tests/test_geo_maps.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,9 @@
66
import numpy as np
77

88

9-
from delphi_quidel_covidtest.geo_maps import (zip_to_msa, zip_to_hrr,
10-
zip_to_county, zip_to_state)
9+
from delphi_quidel_covidtest.geo_maps import geo_map
1110

1211

13-
map_df = pd.read_csv(
14-
join("../static", "fips_prop_pop.csv"), dtype={"fips": int}
15-
)
16-
1712
class TestGeoMap:
1813
def test_county(self):
1914

@@ -24,10 +19,11 @@ def test_county(self):
2419
"2020-06-15", "2020-06-15", "2020-06-15"],
2520
"totalTest": [100, 50, 200, 200, 250, 500],
2621
"positiveTest": [10, 8, 15, 5, 20, 50],
22+
"numUniqueDevices": [2, 1, 1, 1, 1, 1]
2723
}
2824
)
2925

30-
new_df, res_key = zip_to_county(df, map_df)
26+
new_df, res_key = geo_map("county", df)
3127

3228
assert res_key == 'fips'
3329
assert set(new_df["fips"].values) == set(['25027', '53011', '48439'])
@@ -44,10 +40,11 @@ def test_state(self):
4440
"2020-06-15", "2020-06-15", "2020-06-15"],
4541
"totalTest": [100, 50, 200, 200, 250, 500],
4642
"positiveTest": [10, 8, 15, 5, 20, 50],
43+
"numUniqueDevices": [2, 1, 1, 1, 1, 1]
4744
}
4845
)
4946

50-
new_df = zip_to_state(df, map_df)
47+
new_df = geo_map("state", df)
5148

5249
assert set(new_df["state_id"].values) == set(['ma', 'tx', 'wa'])
5350
assert set(new_df["timestamp"].values) == set(df["timestamp"].values)
@@ -63,12 +60,13 @@ def test_hrr(self):
6360
"2020-06-15", "2020-06-15", "2020-06-15"],
6461
"totalTest": [100, 50, 200, 200, 250, 500],
6562
"positiveTest": [10, 8, 15, 5, 20, 50],
63+
"numUniqueDevices": [2, 1, 1, 1, 1, 1]
6664
}
6765
)
6866

69-
new_df, res_key = zip_to_hrr(df, map_df)
67+
new_df, res_key = geo_map("hrr", df)
7068

71-
assert set(new_df["hrrnum"].values) == set([16, 231, 340, 344, 394])
69+
assert set(new_df["hrr"].values) == set(["16", "231", "340", "344", "394"])
7270
assert set(new_df["timestamp"].values) == set(df["timestamp"].values)
7371
assert set(new_df["totalTest"].values) == set([500, 100, 250, 50, 400])
7472
assert set(new_df["positiveTest"].values) == set([50, 10, 20, 8, 20])
@@ -77,18 +75,19 @@ def test_msa(self):
7775

7876
df = pd.DataFrame(
7977
{
80-
"zip": [1607, 73716, 73719, 76010, 74435, 74936],
78+
"zip": [1607, 73716, 73719, 76010, 74945, 74936],
8179
"timestamp": ["2020-06-15", "2020-06-15", "2020-06-15",
8280
"2020-06-15", "2020-06-15", "2020-06-15"],
8381
"totalTest": [100, 50, 200, 200, 250, 500],
8482
"positiveTest": [10, 8, 15, 5, 20, 50],
83+
"numUniqueDevices": [2, 1, 1, 1, 1, 1]
8584
}
8685
)
8786

88-
new_df, res_key = zip_to_msa(df, map_df)
87+
new_df, res_key = geo_map("msa", df)
8988

90-
assert res_key == 'cbsa_id'
91-
assert set(new_df["cbsa_id"].values) == set(['19100', '22900', '49340'])
89+
assert res_key == 'msa'
90+
assert set(new_df["msa"].values) == set(['19100', '22900', '49340'])
9291
assert set(new_df["timestamp"].values) == set(df["timestamp"].values)
9392
assert set(new_df["totalTest"].values) == set([200, 750, 100])
9493
assert set(new_df["positiveTest"].values) == set([5, 70, 10])

0 commit comments

Comments
 (0)