9
9
from datetime import date , timedelta , datetime
10
10
from itertools import product
11
11
import re
12
- import sys
13
12
14
13
import covidcast
15
14
import pandas as pd
16
15
17
- from delphi_utils import read_params , create_export_csv
18
- from .constants import *
19
- from .handle_wip_signal import *
16
+ from delphi_utils import read_params
17
+ from .constants import METRICS , SMOOTH_TYPES , SENSORS , GEO_RESOLUTIONS
18
+ from .handle_wip_signal import add_prefix
20
19
21
20
22
- def check_not_none (data_frame , label , date_range ):
23
- """Exit gracefully if a data frame we attempted to retrieve is empty """
21
+ def check_none_data_frame (data_frame , label , date_range ):
22
+ """Log and return True when a data frame is None. """
24
23
if data_frame is None :
25
- print (f"{ label } not available in range { date_range } " )
26
- sys .exit (1 )
24
+ print (f"{ label } completely unavailable in range { date_range } " )
25
+ return True
26
+ return False
27
27
28
- def combine_usafacts_and_jhu (signal , geo , date_range ):
28
+ def maybe_append (df1 , df2 ):
29
+ """
30
+ If both data frames are available, append them and return. Otherwise, return
31
+ whichever frame is not None.
32
+ """
33
+ if df1 is None :
34
+ return df2
35
+ if df2 is None :
36
+ return df1
37
+ return df1 .append (df2 )
38
+
39
+ COLUMN_MAPPING = {"time_value" : "timestamp" ,
40
+ "geo_value" : "geo_id" ,
41
+ "value" : "val" ,
42
+ "stderr" : "se" ,
43
+ "sample_size" : "sample_size" }
44
+ def combine_usafacts_and_jhu (signal , geo , date_range , fetcher = covidcast .signal ):
29
45
"""
30
46
Add rows for PR from JHU signals to USA-FACTS signals
31
47
"""
32
- usafacts_df = covidcast .signal ("usa-facts" , signal , date_range [0 ], date_range [1 ], geo )
33
- jhu_df = covidcast .signal ("jhu-csse" , signal , date_range [0 ], date_range [1 ], geo )
34
- check_not_none (usafacts_df , "USA-FACTS" , date_range )
35
- check_not_none (jhu_df , "JHU" , date_range )
48
+ print ("Fetching usa-facts..." )
49
+ usafacts_df = fetcher ("usa-facts" , signal , date_range [0 ], date_range [1 ], geo )
50
+ print ("Fetching jhu-csse..." )
51
+ jhu_df = fetcher ("jhu-csse" , signal , date_range [0 ], date_range [1 ], geo )
52
+
53
+ if check_none_data_frame (usafacts_df , "USA-FACTS" , date_range ) and \
54
+ (geo not in ('state' , 'county' ) or \
55
+ check_none_data_frame (jhu_df , "JHU" , date_range )):
56
+ return pd .DataFrame ({}, columns = COLUMN_MAPPING .values ())
36
57
37
58
# State level
38
59
if geo == 'state' :
39
- combined_df = usafacts_df .append (jhu_df [jhu_df ["geo_value" ] == 'pr' ])
60
+ combined_df = maybe_append (
61
+ usafacts_df ,
62
+ jhu_df if jhu_df is None else jhu_df [jhu_df ["geo_value" ] == 'pr' ])
40
63
# County level
41
64
elif geo == 'county' :
42
- combined_df = usafacts_df .append (jhu_df [jhu_df ["geo_value" ] == '72000' ])
65
+ combined_df = maybe_append (
66
+ usafacts_df ,
67
+ jhu_df if jhu_df is None else jhu_df [jhu_df ["geo_value" ] == '72000' ])
43
68
# For MSA and HRR level, they are the same
44
69
else :
45
70
combined_df = usafacts_df
46
71
47
72
combined_df = combined_df .drop (["direction" ], axis = 1 )
48
- combined_df = combined_df .rename ({"time_value" : "timestamp" ,
49
- "geo_value" : "geo_id" ,
50
- "value" : "val" ,
51
- "stderr" : "se" },
73
+ combined_df = combined_df .rename (COLUMN_MAPPING ,
52
74
axis = 1 )
53
75
return combined_df
54
76
@@ -83,15 +105,12 @@ def sensor_signal(metric, sensor, smoother):
83
105
sensor_name = "_" .join ([smoother , sensor ])
84
106
else :
85
107
sensor_name = sensor
86
- signal = "_" .join ([metric , sensor_name ])
87
- return sensor_name , signal
88
-
89
- def run_module ():
90
- """Produce a combined cases and deaths signal using data from JHU and USA Facts"""
91
- variants = [tuple ((metric , geo_res )+ sensor_signal (metric , sensor , smoother ))
92
- for (metric , geo_res , sensor , smoother ) in
93
- product (METRICS , GEO_RESOLUTIONS , SENSORS , SMOOTH_TYPES )]
108
+ return sensor_name , "_" .join ([metric , sensor_name ])
94
109
110
+ def configure (variants ):
111
+ """
112
+ Validate params file and set date range.
113
+ """
95
114
params = read_params ()
96
115
params ['export_start_date' ] = date (* params ['export_start_date' ])
97
116
yesterday = date .today () - timedelta (days = 1 )
@@ -112,30 +131,36 @@ def run_module():
112
131
# create combined files for all of the historical reports
113
132
params ['date_range' ] = [params ['export_start_date' ], yesterday ]
114
133
else :
115
- pattern = re .compile (r'^\d{8}-\d{8}$' )
116
- match_res = re .findall (pattern , params ['date_range' ])
134
+ match_res = re .findall (re .compile (r'^\d{8}-\d{8}$' ), params ['date_range' ])
117
135
if len (match_res ) == 0 :
118
136
raise ValueError (
119
137
"Invalid date_range parameter. Please choose from (new, all, yyyymmdd-yyyymmdd)." )
120
138
try :
121
139
date1 = datetime .strptime (params ['date_range' ][:8 ], '%Y%m%d' ).date ()
122
- except ValueError :
123
- raise ValueError ("Invalid date_range parameter. Please check the first date." )
140
+ except ValueError as error :
141
+ raise ValueError (
142
+ "Invalid date_range parameter. Please check the first date." ) from error
124
143
try :
125
144
date2 = datetime .strptime (params ['date_range' ][- 8 :], '%Y%m%d' ).date ()
126
- except ValueError :
127
- raise ValueError ("Invalid date_range parameter. Please check the second date." )
145
+ except ValueError as error :
146
+ raise ValueError (
147
+ "Invalid date_range parameter. Please check the second date." ) from error
128
148
129
149
#The the valid start date
130
150
if date1 < params ['export_start_date' ]:
131
151
date1 = params ['export_start_date' ]
132
152
params ['date_range' ] = [date1 , date2 ]
153
+ return params
133
154
134
- for metric , geo_res , sensor_name , signal in variants :
135
-
136
- df = combine_usafacts_and_jhu (signal , geo_res , extend_raw_date_range (params , sensor_name ))
137
155
138
- df = df .copy ()
156
+ def run_module ():
157
+ """Produce a combined cases and deaths signal using data from JHU and USA Facts"""
158
+ variants = [tuple ((metric , geo_res )+ sensor_signal (metric , sensor , smoother ))
159
+ for (metric , geo_res , sensor , smoother ) in
160
+ product (METRICS , GEO_RESOLUTIONS , SENSORS , SMOOTH_TYPES )]
161
+ params = configure (variants )
162
+ for metric , geo_res , sensor_name , signal in variants :
163
+ df = combine_usafacts_and_jhu (signal , geo_res , extend_raw_date_range (params , sensor_name )) # pylint: disable=invalid-name
139
164
df ["timestamp" ] = pd .to_datetime (df ["timestamp" ])
140
165
start_date = pd .to_datetime (params ['export_start_date' ])
141
166
export_dir = params ["export_dir" ]
@@ -145,8 +170,7 @@ def run_module():
145
170
146
171
signal_name = add_prefix ([signal ], wip_signal = params ["wip_signal" ], prefix = "wip_" )
147
172
for date_ in dates :
148
- export_fn = f'{ date_ .strftime ("%Y%m%d" )} _{ geo_res } _' f" { signal_name [0 ]} .csv"
173
+ export_fn = f'{ date_ .strftime ("%Y%m%d" )} _{ geo_res } _{ signal_name [0 ]} .csv'
149
174
df [df ["timestamp" ] == date_ ][["geo_id" , "val" , "se" , "sample_size" , ]].to_csv (
150
175
f"{ export_dir } /{ export_fn } " , index = False , na_rep = "NA"
151
176
)
152
-
0 commit comments