Skip to content

Commit 87f5234

Browse files
authored
Merge pull request #226 from cmu-delphi/rf_geo
Rf geo final
2 parents 73308d9 + 88fa1e6 commit 87f5234

File tree

6 files changed

+57
-57
lines changed

6 files changed

+57
-57
lines changed

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,17 @@ class GeoMapper:
5151
+ county -> state : unweighted
5252
+ county -> msa : unweighted
5353
+ county -> megacounty
54+
+ county -> hrr
55+
+ county -> zip
5456
- zip -> state
5557
- zip -> msa
56-
- fips -> hrr
5758
5859
Geotypes (listed by default column name):
5960
zip: zip5, length 5 str of 0-9 with leading 0's
6061
fips: county, length 5 str of 0-9 with leading 0's
6162
msa: metro stat area, length 5 str of 0-9 with leading 0's
62-
st_code: state code, str of 0-9
63-
state_id: state id, str of A-Z
63+
st_code: state code, str in 1-99
64+
state_id: state id, str in AA-ZZ
6465
hrr: hrr id, int 1-500
6566
"""
6667

@@ -147,11 +148,26 @@ def load_jhu_uid_fips_cross(self):
147148
'weight': float})
148149
self.jhu_uid_fips_cross = GeoMapper.convert_int_to_str5(self.jhu_uid_fips_cross, int_col='fips', str_col='fips')
149150

151+
@staticmethod
152+
def convert_int_to_str5(data,
153+
int_col='fips',
154+
str_col='fips'):
155+
"""convert int to a string of length 5"""
156+
data = data.copy()
157+
data[str_col] = data[int_col].astype(str).str.zfill(5)
158+
return data
150159

151-
def convert_fips_to_stcode(self,
160+
def convert_intfips_to_str(self,
152161
data,
153-
fips_col='fips',
154-
stcode_col='st_code'):
162+
intfips_col='fips',
163+
strfips_col='fips'):
164+
"""convert fips to a string of length 5"""
165+
return GeoMapper.convert_int_to_str5(data,int_col=intfips_col,str_col=strfips_col)
166+
167+
def convert_fips_to_stcode(self,
168+
data: pd.DataFrame,
169+
fips_col: str = 'fips',
170+
stcode_col: str = 'st_code'):
155171
"""create st_code column from fips column
156172
157173
Args:
@@ -168,23 +184,6 @@ def convert_fips_to_stcode(self,
168184
data[stcode_col] = data[fips_col].str[:2]
169185
return data
170186

171-
@staticmethod
172-
def convert_int_to_str5(data,
173-
int_col='fips',
174-
str_col='fips'):
175-
"""convert int to a string of length 5"""
176-
data = data.copy()
177-
data[str_col] = data[int_col].astype(str).str.zfill(5)
178-
return data
179-
180-
def convert_intfips_to_str(self,
181-
data,
182-
intfips_col='fips',
183-
strfips_col='fips'):
184-
"""convert fips to a string of length 5"""
185-
return GeoMapper.convert_int_to_str5(data,int_col=intfips_col,str_col=strfips_col)
186-
187-
188187
def convert_stcode_to_state_id(self,
189188
data,
190189
stcode_col='st_code',
@@ -365,16 +364,11 @@ def county_to_state(self,
365364
if count_cols:
366365
data = data[[fips_col,date_col] + count_cols].copy()
367366
data = self.convert_fips_to_state_id(data,fips_col=fips_col,state_id_col=state_id_col,full=full)
368-
data.dropna(subset=['st_code'], axis=0, inplace=True)
367+
data.dropna(subset=[state_id_col], axis=0, inplace=True)
369368
data.drop([fips_col,'st_code'],axis=1,inplace=True)
370-
# assert not data[state_id_col].isnull().values.any(), "nan states, probably invalid fips"
371-
if date_col:
372-
assert not data[date_col].isnull().values.any(), "nan dates not allowed"
373-
data.fillna(0,inplace=True)
374-
data = data.groupby([date_col,state_id_col]).sum()
375-
else:
376-
data.fillna(0,inplace=True)
377-
data = data.groupby(state_id_col).sum()
369+
assert not data[date_col].isnull().values.any(), "nan dates not allowed"
370+
# data.fillna(0,inplace=True)
371+
data = data.groupby([date_col,state_id_col]).sum()
378372
return data.reset_index()
379373

380374
def county_to_msa(self,
@@ -828,4 +822,4 @@ def county_to_hrr(self,
828822
zip_col = "_zip_col_temp"
829823
data = self.county_to_zip(data, fips_col=fips_col, zip_col=zip_col, date_col=date_col, count_cols=count_cols)
830824
data = self.zip_to_hrr(data, zip_col=zip_col, date_col=date_col, count_cols=count_cols, hrr_col=hrr_col)
831-
return data.astype(dtype={hrr_col: int})
825+
return data.astype(dtype={hrr_col: int})

_delphi_utils_python/tests/test_geomap.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
import pytest
2-
import os
1+
from delphi_utils.geomap import GeoMapper
32
import pandas as pd
43
import numpy as np
5-
from delphi_utils.geomap import GeoMapper
64

75

86
class TestGeoMapper:

jhu/delphi_jhu/geo.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def geo_map(df: pd.DataFrame, geo_res: str, sensor: str):
3737
elif geo_res == "msa":
3838
df = gmpr.county_to_msa(df, fips_col="fips", msa_col="geo_id", date_col="timestamp")
3939
df['geo_id'] = df['geo_id'].astype(int)
40+
print(df[df['population']==0])
4041
elif geo_res == 'hrr':
4142
df = gmpr.county_to_hrr(df, fips_col="fips", hrr_col="geo_id", date_col="timestamp")
4243
df['geo_id'] = df['geo_id'].astype(int)

jhu/delphi_jhu/pull.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# -*- coding: utf-8 -*-
2-
import numpy as np
3-
import pandas as pd
4-
import re
52

63
from delphi_utils import GeoMapper
4+
import pandas as pd
5+
import numpy as np
6+
import re
77

88
def detect_date_col(col_name: str):
99
"""determine if column name is a date"""

jhu/tests/compare_receiving.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,40 @@
55
import pandas as pd
66
import os
77

8-
def load_files():
8+
rec_pattern = ""
9+
# rec_pattern = "county_deaths_incidence_num"
10+
11+
def load_files(pattern = "", num = 1000):
912
rec_dir = os.listdir('../receiving')
10-
rec_stable_dir = os.listdir('../receiving_rf')
11-
rec_common = set(rec_dir) & set(rec_stable_dir)
13+
suff = "stable"
14+
rec_stable_dir = os.listdir(f'../receiving_{suff}')
15+
rec_common = list(set(rec_dir) & set(rec_stable_dir))
1216
print(set(rec_dir).symmetric_difference(rec_stable_dir))
17+
num_iter = 0
1318
for rec in rec_common:
14-
df_rec = pd.read_csv(f'../receiving/{rec}').set_index('geo_id')
15-
df_stable = pd.read_csv(f'../receiving_rf/{rec}').set_index('geo_id')
16-
try:
17-
df_join = df_rec.join(df_stable, rsuffix='_stable' )
18-
except:
19-
print(df_rec.info())
20-
print(df_stable.info())
21-
assert False, f"failed join on {rec}"
22-
yield rec, df_join
19+
if num_iter <= num:
20+
num_iter += 1
21+
df_rec = pd.read_csv(f'../receiving/{rec}').set_index('geo_id')
22+
df_stable = pd.read_csv(f'../receiving_{suff}/{rec}').set_index('geo_id')
23+
try:
24+
df_join = df_rec.join(df_stable, rsuffix='_stable' )
25+
except:
26+
print(df_rec.info())
27+
print(df_stable.info())
28+
assert False, f"failed join on {rec}"
29+
yield rec, df_join
2330

2431
def main():
25-
load_iter = load_files()
32+
load_iter = load_files(rec_pattern)
2633
for rec, df in load_iter:
2734
if ('msa' in rec) and False:
2835
msa_ds = (df['val'] - df['val_stable']).sum()
2936
print(f'{msa_ds} value diff')
30-
if (df.eval('abs(val - val_stable)').sum() > 0.01) and ("county" in rec):
37+
if (df.eval('abs(val - val_stable)').sum() > 0.01):
3138
print(f'Printing {rec} difference')
3239
df_diff = df[df.eval('val != val_stable')]
33-
print(df_diff.head())
3440
print(df_diff.shape)
41+
df_diff.to_csv(f'rec_diffs/diff_{rec}.csv')
3542
# assert "county_confirmed_7dav_incidence_num" not in rec, f"{rec}!!!"
3643
#input('w')
3744

jhu/tests/test_smooth.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010
class TestSmooth:
1111
def test_output_files_smoothed(self, run_as_module):
1212

13-
dates = [str(x) for x in range(20200304, 20200311)]
13+
dates = [str(x) for x in range(20200701, 20200730)]
1414

1515
smoothed = pd.read_csv(
16-
join("receiving",
16+
join("../receiving",
1717
f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv")
1818
)
1919

2020
raw = pd.concat([
2121
pd.read_csv(
22-
join("receiving",
22+
join("../receiving",
2323
f"{date}_state_confirmed_cumulative_num.csv")
2424
) for date in dates
2525
])

0 commit comments

Comments
 (0)