Skip to content

Commit 5b41311

Browse files
authored
clean state column (#2105)
* in progress * filtering also on hhs and check in test * regenerated test data to update current behavior * lint
1 parent 049de70 commit 5b41311

File tree

4 files changed

+125
-1
lines changed

4 files changed

+125
-1
lines changed

nhsn/delphi_nhsn/run.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,17 @@ def run_module(params, logger=None):
7373
df = df[df["geo_id"] == "us"]
7474
elif geo == "hhs":
7575
df = df[df["geo_id"] != "us"]
76+
df = df[df["geo_id"].str.len() == 2]
7677
df.rename(columns={"geo_id": "state_id"}, inplace=True)
7778
df = geo_mapper.add_geocode(df, "state_id", "state_code", from_col="state_id")
7879
df = geo_mapper.add_geocode(df, "state_code", "hhs", from_col="state_code", new_col="hhs")
7980
df = geo_mapper.replace_geocode(
8081
df, from_col="state_code", from_code="state_code", new_col="geo_id", new_code="hhs"
8182
)
82-
else:
83+
elif geo == "state":
8384
df = df[df_pull["geo_id"] != "us"]
85+
df = df[df["geo_id"].str.len() == 2] # hhs region is a value in geo_id column
86+
8487
df["se"] = np.nan
8588
df["sample_size"] = np.nan
8689
dates = create_export_csv(

nhsn/tests/test_data/20241212.csv.gz

21 Bytes
Binary file not shown.

nhsn/tests/test_data/page.json

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,5 +2122,123 @@
21222122
"pctconfc19icubedsperchos": "-0.0002",
21232123
"pctconffluicubedsperchos": "-0.012",
21242124
"pctconfrsvicubedsperchos": "0.0"
2125+
},
2126+
{
2127+
"weekendingdate": "2021-10-16T00:00:00.000",
2128+
"jurisdiction": "region 1",
2129+
"numinptbeds": "729772.05",
2130+
"numinptbedsadult": "660758.41",
2131+
"numinptbedsped": "53387.68",
2132+
"numinptbedsocc": "556817.29",
2133+
"numinptbedsoccadult": "510524.46",
2134+
"numinptbedsoccped": "35679.97",
2135+
"numicubeds": "114471.4",
2136+
"numicubedsadult": "86925.11",
2137+
"numicubedsped": "21902.45",
2138+
"numicubedsocc": "85344.84",
2139+
"numicubedsoccadult": "66877.69",
2140+
"numicubedsoccped": "14657.22",
2141+
"numconfc19hosppatsadult": "55448.05",
2142+
"numconfc19hosppatsped": "847.15",
2143+
"totalconfc19hosppats": "56295.2",
2144+
"totalconffluhosppats": "259.85",
2145+
"numconfc19icupatsadult": "18104.6",
2146+
"totalconfc19icupats": "18104.6",
2147+
"totalconffluicupats": "39.33",
2148+
"totalconfc19newadmped": "1301.0",
2149+
"numconfc19newadmadult18to49": "11532.0",
2150+
"totalconfc19newadmadult": "44677.0",
2151+
"numconfc19newadmunk": "1982.0",
2152+
"totalconfc19newadm": "45978.0",
2153+
"totalconfflunewadm": "266.0",
2154+
"pctinptbedsocc": "0.763",
2155+
"pctconfc19inptbeds": "0.0771",
2156+
"pctconffluinptbeds": "0.0004",
2157+
"pcticubedsocc": "0.7456",
2158+
"pctconfc19icubeds": "0.1582",
2159+
"pctconffluicubeds": "0.0003",
2160+
"pctconfc19newadmadult": "0.9717",
2161+
"pctconfc19newadmped": "0.0283",
2162+
"numinptbedshosprep": "5396",
2163+
"numinptbedsocchosprep": "5396",
2164+
"numicubedshosprep": "5396",
2165+
"numicubedsocchosprep": "5396",
2166+
"totalconfc19hosppatshosprep": "5396",
2167+
"totalconffluhosppatshosprep": "4317",
2168+
"totalconfrsvhosppatshosprep": "0",
2169+
"totalconfc19icupatshosprep": "5396",
2170+
"totalconffluicupatshosprep": "4306",
2171+
"totalconfrsvicupatshosprep": "0",
2172+
"totalconfc19newadmpedhosprep": "5278",
2173+
"totalconfc19newadmadulthosprep": "5394",
2174+
"totalconfc19newadmhosprep": "5394",
2175+
"totalconfflunewadmpedhosprep": "0",
2176+
"totalconfflunewadmadulthosprep": "0",
2177+
"totalconfflunewadmhosprep": "4307",
2178+
"totalconfrsvnewadmpedhosprep": "0",
2179+
"totalconfrsvnewadmadulthosprep": "0",
2180+
"totalconfrsvnewadmhosprep": "0",
2181+
"pctinptbedsocchosprep": "5396",
2182+
"pcticubedsocchosprep": "5396",
2183+
"pctconfc19inptbedshosprep": "5396",
2184+
"pctconffluinptbedshosprep": "4317",
2185+
"pctconfrsvinptbedshosprep": "0",
2186+
"pctconfc19icubedshosprep": "5396",
2187+
"pctconffluicubedshosprep": "4306",
2188+
"pctconfrsvicubedshosprep": "0",
2189+
"numinptbedsperchosprep": "0.9492",
2190+
"numinptbedsoccperchosprep": "0.9492",
2191+
"numicubedsperchosprep": "0.9492",
2192+
"numicubedsoccperchosprep": "0.9492",
2193+
"totalconfc19hosppatsperc": "0.9492",
2194+
"totalconffluhosppatsperc": "0.7594",
2195+
"totalconfrsvhosppatsperc": "0.0",
2196+
"totalconfc19icupatsperchosprep": "0.9492",
2197+
"totalconffluicupatsperchosprep": "0.7574",
2198+
"totalconfrsvicupatsperchosprep": "0.0",
2199+
"totalconfc19newadmpedper": "0.9284",
2200+
"totalconfc19newadmadultp": "0.9488",
2201+
"totalconfc19newadmperchosprep": "94.88",
2202+
"totalconfflunewadmpedper": "0.0",
2203+
"totalconfflunewadmadultp": "0.0",
2204+
"totalconfflunewadmperchosprep": "75.76",
2205+
"totalconfrsvnewadmpedper": "0.0",
2206+
"totalconfrsvnewadmadultp": "0.0",
2207+
"totalconfrsvnewadmperchosprep": "0.0",
2208+
"pctinptbedsoccperchosprep": "0.9492",
2209+
"pcticubedsoccperchosprep": "0.9492",
2210+
"pctconfc19inptbedsperchosprep": "0.9492",
2211+
"pctconffluinptbedsperchosprep": "0.7594",
2212+
"pctconfrsvinptbedsperchosprep": "0.0",
2213+
"pctconfc19icubedsperchosprep": "0.9492",
2214+
"pctconffluicubedsperchosprep": "0.7574",
2215+
"pctconfrsvicubedsperchosprep": "0.0",
2216+
"numinptbedsperchosprepabschg": "-0.07",
2217+
"numinptbedsoccperchospre": "-0.07",
2218+
"numicubedsperchosprepabschg": "-0.07",
2219+
"numicubedsoccperchosprepabschg": "-0.07",
2220+
"totalconfc19hosppatsperc_1": "-0.02",
2221+
"totalconffluhosppatsperc_1": "-1.23",
2222+
"totalconfrsvhosppatsperc_1": "0.0",
2223+
"totalconfc19icupatsperch": "-0.02",
2224+
"totalconffluicupatsperch": "-1.2",
2225+
"totalconfrsvicupatsperch": "0.0",
2226+
"totalconfc19newadmpedper_1": "-0.04",
2227+
"totalconfc19newadmadultp_1": "-0.04",
2228+
"totalconfc19newadmpercho": "-0.04",
2229+
"totalconfflunewadmpedper_1": "0.0",
2230+
"totalconfflunewadmadultp_1": "0.0",
2231+
"totalconfflunewadmpercho": "-1.27",
2232+
"totalconfrsvnewadmpedper_1": "0.0",
2233+
"totalconfrsvnewadmadultp_1": "0.0",
2234+
"totalconfrsvnewadmpercho": "0.0",
2235+
"pctinptbedsoccperchospre": "-0.0007",
2236+
"pcticubedsoccperchosprepabschg": "-0.0007",
2237+
"pctconfc19inptbedspercho": "-0.0002",
2238+
"pctconffluinptbedspercho": "-0.0123",
2239+
"pctconfrsvinptbedspercho": "0.0",
2240+
"pctconfc19icubedsperchos": "-0.0002",
2241+
"pctconffluicubedsperchos": "-0.012",
2242+
"pctconfrsvicubedsperchos": "0.0"
21252243
}
21262244
]

nhsn/tests/test_run.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ def test_output_files_exist(self, params, run_as_module):
4545
"geo_id", "val", "se", "sample_size",
4646
]
4747
assert (df.columns.values == expected_columns).all()
48+
if geo == "state":
49+
states = list(df["geo_id"].values)
50+
assert all(len(state) == 2 for state in states)
4851

4952
for file in Path(export_dir).glob("*.csv"):
5053
os.remove(file)

0 commit comments

Comments
 (0)