Skip to content

Commit a627e1c

Browse files
authored
Merge pull request #573 from cmu-delphi/safegraph-hrr-msa
Add HRR and MSA to safegraph
2 parents 732d696 + f6d8fc2 commit a627e1c

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

safegraph/delphi_safegraph/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@
1515
GEO_RESOLUTIONS = [
1616
'county',
1717
'state',
18+
'msa',
19+
'hrr'
1820
]

safegraph/delphi_safegraph/process.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,17 +123,31 @@ def aggregate(df, signal_names, geo_resolution='county'):
123123
signals, standard errors, and sample sizes.
124124
"""
125125
# Prepare geo resolution
126+
gmpr = GeoMapper()
126127
if geo_resolution == 'county':
127128
geo_transformed_df = df.copy()
128129
geo_transformed_df['geo_id'] = df['county_fips']
129130
elif geo_resolution == 'state':
130-
gmpr = GeoMapper()
131131
geo_transformed_df = gmpr.add_geocode(df,
132-
from_col='county_fips',
133-
from_code='fips',
134-
new_code='state_id',
135-
new_col='geo_id',
136-
dropna=False)
132+
from_col='county_fips',
133+
from_code='fips',
134+
new_code='state_id',
135+
new_col='geo_id',
136+
dropna=False)
137+
elif geo_resolution == 'msa':
138+
geo_transformed_df = gmpr.add_geocode(df,
139+
from_col='county_fips',
140+
from_code='fips',
141+
new_code='msa',
142+
new_col='geo_id',
143+
dropna=False)
144+
elif geo_resolution == 'hrr':
145+
geo_transformed_df = gmpr.add_geocode(df,
146+
from_col='county_fips',
147+
from_code='fips',
148+
new_code='hrr',
149+
new_col='geo_id',
150+
dropna=False)
137151
else:
138152
raise ValueError(
139153
f'`geo_resolution` must be one of {GEO_RESOLUTIONS}.')

safegraph/tests/test_process.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def test_aggregate_county(self):
4242
assert np.all(df[f'{SIGNALS[0]}_n'].values > 0)
4343
x = df[f'{SIGNALS[0]}_se'].values
4444
assert np.all(x[~np.isnan(x)] >= 0)
45+
assert df.shape == (1472, 17)
4546

4647
def test_aggregate_state(self):
4748
"""Tests that aggregation at the state level creates non-zero-valued
@@ -53,6 +54,31 @@ def test_aggregate_state(self):
5354
assert np.all(df[f'{SIGNALS[0]}_n'].values > 0)
5455
x = df[f'{SIGNALS[0]}_se'].values
5556
assert np.all(x[~np.isnan(x)] >= 0)
57+
assert df.shape == (52, 17)
58+
59+
def test_aggregate_msa(self):
60+
"""Tests that aggregation at the state level creates non-zero-valued
61+
signals."""
62+
cbg_df = construct_signals(pd.read_csv('raw_data/sample_raw_data.csv'),
63+
SIGNALS)
64+
df = aggregate(cbg_df, SIGNALS, 'msa')
65+
66+
assert np.all(df[f'{SIGNALS[0]}_n'].values > 0)
67+
x = df[f'{SIGNALS[0]}_se'].values
68+
assert np.all(x[~np.isnan(x)] >= 0)
69+
assert df.shape == (372, 17)
70+
71+
def test_aggregate_hrr(self):
72+
"""Tests that aggregation at the state level creates non-zero-valued
73+
signals."""
74+
cbg_df = construct_signals(pd.read_csv('raw_data/sample_raw_data.csv'),
75+
SIGNALS)
76+
df = aggregate(cbg_df, SIGNALS, 'hrr')
77+
78+
assert np.all(df[f'{SIGNALS[0]}_n'].values > 0)
79+
x = df[f'{SIGNALS[0]}_se'].values
80+
assert np.all(x[~np.isnan(x)] >= 0)
81+
assert df.shape == (306, 17)
5682

5783
def test_files_in_past_week(self):
5884
"""Tests that `files_in_past_week()` finds the file names corresponding

0 commit comments

Comments
 (0)