4
4
import tempfile
5
5
import os
6
6
7
- from delphi_hhs .run import _date_to_int , int_date_to_previous_day_datetime , generate_date_ranges , \
7
+ from delphi_hhs .run import _date_to_int , add_nancodes , int_date_to_previous_day_datetime , generate_date_ranges , \
8
8
make_signal , make_geo , run_module , pop_proportion
9
9
from delphi_hhs .constants import SMOOTHERS , GEOS , SIGNALS , \
10
10
CONFIRMED , SUM_CONF_SUSP , CONFIRMED_FLU , CONFIRMED_PROP , SUM_CONF_SUSP_PROP , CONFIRMED_FLU_PROP
11
- from delphi_utils . geomap import GeoMapper
11
+ from delphi_utils import GeoMapper , Nans
12
12
from freezegun import freeze_time
13
13
import numpy as np
14
14
import pandas as pd
@@ -85,15 +85,15 @@ def test_make_signal():
85
85
})
86
86
pd .testing .assert_frame_equal (expected_flu , make_signal (data , CONFIRMED_FLU ))
87
87
pd .testing .assert_frame_equal (expected_flu , make_signal (data , CONFIRMED_FLU_PROP ))
88
-
88
+
89
89
with pytest .raises (Exception ):
90
90
make_signal (data , "zig" )
91
91
92
92
def test_pop_proportion ():
93
93
geo_mapper = GeoMapper ()
94
94
state_pop = geo_mapper .get_crosswalk ("state_code" , "pop" )
95
95
96
- test_df = pd .DataFrame ({
96
+ test_df = pd .DataFrame ({
97
97
'state' : ['PA' ],
98
98
'state_code' : [42 ],
99
99
'timestamp' : [datetime (year = 2020 , month = 1 , day = 1 )],
@@ -109,7 +109,7 @@ def test_pop_proportion():
109
109
'val' : [15 / pa_pop * 100000 ],})
110
110
)
111
111
112
- test_df = pd .DataFrame ({
112
+ test_df = pd .DataFrame ({
113
113
'state' : ['WV' ],
114
114
'state_code' : [54 ],
115
115
'timestamp' : [datetime (year = 2020 , month = 1 , day = 1 )],
@@ -137,30 +137,23 @@ def test_make_geo():
137
137
'val' : [1. , 2. , 4. ],
138
138
})
139
139
140
- template = {
141
- 'se' : np .nan ,
142
- 'sample_size' : np .nan ,
143
- }
144
140
expecteds = {
145
141
"state" : pd .DataFrame (
146
- dict (template ,
147
- geo_id = data .state ,
142
+ dict (geo_id = data .state ,
148
143
timestamp = data .timestamp ,
149
144
val = data .val )),
150
145
"hhs" : pd .DataFrame (
151
- dict (template ,
152
- geo_id = ['3' , '5' ],
146
+ dict (geo_id = ['3' , '5' ],
153
147
timestamp = [test_timestamp ] * 2 ,
154
148
val = [3. , 4. ])),
155
149
"nation" : pd .DataFrame (
156
- dict (template ,
157
- geo_id = ['us' ],
150
+ dict (geo_id = ['us' ],
158
151
timestamp = [test_timestamp ],
159
152
val = [7. ]))
160
153
}
161
154
for geo , expected in expecteds .items ():
162
155
result = make_geo (data , geo , geo_mapper )
163
- for series in ["geo_id" , "timestamp" , "val" , "se" , "sample_size" ]:
156
+ for series in ["geo_id" , "timestamp" , "val" ]:
164
157
pd .testing .assert_series_equal (expected [series ], result [series ], obj = f"{ geo } :{ series } " )
165
158
166
159
@@ -207,3 +200,25 @@ def test_ignore_last_range_no_results(mock_covid_hosp, mock_export):
207
200
}
208
201
}
209
202
assert not run_module (params ) # function should not raise value error and has no return value
203
+
204
+ def test_add_nancode ():
205
+ data = pd .DataFrame ({
206
+ 'state' : ['PA' ,'WV' ,'OH' ],
207
+ 'state_code' : [42 , 54 , 39 ],
208
+ 'timestamp' : [pd .to_datetime ("20200601" )]* 3 ,
209
+ 'val' : [1 , 2 , np .nan ],
210
+ 'se' : [np .nan ] * 3 ,
211
+ 'sample_size' : [np .nan ] * 3 ,
212
+ })
213
+ expected = pd .DataFrame ({
214
+ 'state' : ['PA' ,'WV' ,'OH' ],
215
+ 'state_code' : [42 , 54 , 39 ],
216
+ 'timestamp' : [pd .to_datetime ("20200601" )]* 3 ,
217
+ 'val' : [1 , 2 , np .nan ],
218
+ 'se' : [np .nan ] * 3 ,
219
+ 'sample_size' : [np .nan ] * 3 ,
220
+ 'missing_val' : [Nans .NOT_MISSING ] * 2 + [Nans .UNKNOWN ],
221
+ 'missing_se' : [Nans .NOT_APPLICABLE ] * 3 ,
222
+ 'missing_sample_size' : [Nans .NOT_APPLICABLE ] * 3 ,
223
+ })
224
+ pd .testing .assert_frame_equal (expected , add_nancodes (data ))
0 commit comments