@@ -138,9 +138,8 @@ def ported_signal(
138
138
)
139
139
140
140
if response ["result" ] != 1 :
141
- print (f"check { data_source } { signal } " )
142
141
# Something failed in the API and we did not get real metadata
143
- # raise RuntimeError("Error when fetching signal data from the API", response["message"])
142
+ raise RuntimeError ("Error when fetching signal data from the API" , response ["message" ])
144
143
145
144
api_df = pd .DataFrame .from_dict (response ["epidata" ])
146
145
if not api_df .empty :
@@ -293,30 +292,36 @@ def check_signal():
293
292
for date_range , data_source_signal_list in signal_timeframe_dict .items ():
294
293
for data_source , signal in data_source_signal_list :
295
294
time_type = date_range [2 ]
296
- expected_df = covidcast .signal (data_source , signal , start_day = date_range [0 ], end_day = date_range [1 ],
297
- geo_type = "state" , time_type = time_type )
298
- if expected_df is None :
299
- raise RuntimeError ("Data should exists" )
295
+ filename = f"{ CURRENT_DIR } /covidcast_result/{ data_source } _{ signal } .parquet"
296
+ if not Path (filename ).is_file ():
297
+ # every signal except google-symptom has geo type of state
298
+ geo_type = "state"
299
+ if data_source == "google-symptom" :
300
+ geo_type = "county"
300
301
301
- else :
302
- signal_df_dict [(data_source , signal , time_type )] = expected_df
302
+ expected_df = covidcast .signal (data_source , signal , start_day = date_range [0 ], end_day = date_range [1 ],
303
+ geo_type = geo_type , time_type = time_type )
304
+ if expected_df is None :
305
+ raise RuntimeError ("Data should exists" )
306
+
307
+ expected_df .to_parquet (filename )
308
+ signal_df_dict [(data_source , signal , time_type )] = filename
303
309
304
- time .sleep (500 )# TODO find a elegant way of seperating the logs for the calls from covidcast vs epidata
305
- print ("-" * 90 )
306
310
for date_range , data_source_signal_list in signal_timeframe_dict .items ():
307
311
for data_source , signal in data_source_signal_list :
308
- expected_df = signal_df_dict .get ((data_source , signal , date_range [2 ]))
312
+ expected_filename = signal_df_dict .get ((data_source , signal , date_range [2 ]))
313
+ expected_df = pd .read_parquet (expected_filename )
309
314
315
+ # every signal except google-symptom has geo type of state
316
+ geo_type = "state"
317
+ if data_source == "google-symptom" :
318
+ geo_type = "county"
310
319
df = ported_signal (data_source , signal , start_day = date_range [0 ], end_day = date_range [1 ],
311
320
time_type = date_range [2 ],
312
- geo_type = "state" )
313
- if not expected_df .empty :
314
- check = df .merge (expected_df , indicator = True )
315
- assert (check ["_merge" ] == "both" ).all ()
316
- else :
317
- assert df .empty
318
-
321
+ geo_type = geo_type )
322
+ check = df .merge (expected_df , indicator = True )
323
+ assert (check ["_merge" ] == "both" ).all ()
319
324
320
325
if __name__ == "__main__" :
321
- # check_metadata()
326
+ check_metadata ()
322
327
check_signal ()
0 commit comments