4
4
from datetime import date , datetime
5
5
import pandas as pd
6
6
7
- from delphi_google_symptoms .pull import pull_gs_data , preprocess , get_missing_dates , format_dates_for_query , pull_gs_data_one_geolevel , get_all_dates
7
+ from delphi_google_symptoms .pull import (
8
+ pull_gs_data , preprocess , format_dates_for_query , pull_gs_data_one_geolevel , get_date_range )
8
9
from delphi_google_symptoms .constants import METRICS , COMBINED_METRIC
9
10
10
11
good_input = {
23
24
new_keep_cols = ["geo_id" , "timestamp" ] + METRICS + [COMBINED_METRIC ]
24
25
25
26
26
- # List of faux already-exported CSVs.
27
- exported_files = [
28
- "20201231_whatever.csv" ,
29
- "20210101_whatever.csv" ,
30
- "20210102_whatever.csv" ,
31
- "20210103_whatever.csv" ,
32
- "20210105_not_even_a_csv.txt" ,
33
- "not_the_right_format.csv"
27
+ # Set up fake list of dates to fetch.
28
+ dates = [
29
+ "20200726" ,
30
+ "20200811"
34
31
]
32
+ date_list = [datetime .strptime (date , "%Y%m%d" ).date () for date in dates ]
35
33
36
34
37
35
class TestPullGoogleSymptoms :
38
36
@freeze_time ("2021-01-05" )
39
37
@mock .patch ("pandas_gbq.read_gbq" )
40
38
@mock .patch ("delphi_google_symptoms.pull.initialize_credentials" )
41
- @mock .patch ("delphi_google_symptoms.pull.listdir" )
42
- @mock .patch ("delphi_google_symptoms.pull.isfile" )
43
- def test_good_file (self , mock_is_file , mock_get_dates , mock_credentials , mock_read_gbq ):
39
+ def test_good_file (self , mock_credentials , mock_read_gbq ):
44
40
# Set up fake data.
45
41
state_data = pd .read_csv (
46
42
good_input ["state" ], parse_dates = ["date" ])[keep_cols ]
@@ -50,11 +46,9 @@ def test_good_file(self, mock_is_file, mock_get_dates, mock_credentials, mock_re
50
46
# Mocks
51
47
mock_read_gbq .side_effect = [state_data , county_data ]
52
48
mock_credentials .return_value = None
53
- mock_get_dates .return_value = exported_files
54
- mock_is_file = True
55
49
56
50
dfs = pull_gs_data (
57
- "" , "receiving" , datetime .strptime ("20201230" , "%Y%m%d" ))
51
+ "" , datetime .strptime ("20201230" , "%Y%m%d" ), 0 )
58
52
59
53
for level in ["county" , "state" ]:
60
54
df = dfs [level ]
@@ -88,78 +82,37 @@ def test_invalid_fips(self):
88
82
89
83
class TestPullHelperFuncs :
90
84
@freeze_time ("2021-01-05" )
91
- @ mock . patch ( "delphi_google_symptoms.pull.listdir" )
92
- @ mock . patch ( "delphi_google_symptoms.pull.isfile" )
93
- def test_get_missing_dates ( self , mock_is_file , mock_get_dates ):
94
- mock_get_dates . return_value = exported_files
95
- mock_is_file = True
85
+ def test_get_date_range_recent_export_start_date ( self ):
86
+ output = get_date_range (
87
+ datetime . strptime ( "20201230" , "%Y%m%d" ),
88
+ 14
89
+ )
96
90
97
- output = get_missing_dates (
98
- "receiving" , datetime .strptime ("20201230" , "%Y%m%d" ))
99
-
100
- expected = [date (2020 , 12 , 30 ), date (2021 , 1 , 4 ), date (2021 , 1 , 5 )]
101
- assert set (output ) == set (expected )
102
-
103
- @freeze_time ("2021-01-05" )
104
- @mock .patch ("delphi_google_symptoms.pull.listdir" )
105
- @mock .patch ("delphi_google_symptoms.pull.isfile" )
106
- def test_get_all_dates_recent_export_start_date (self , mock_is_file , mock_get_dates ):
107
- mock_get_dates .return_value = exported_files
108
- mock_is_file = True
109
-
110
- output = get_all_dates (
111
- "receiving" , datetime .strptime ("20201230" , "%Y%m%d" ))
112
-
113
- expected = [date (2020 , 12 , 30 ),
114
- date (2020 , 12 , 31 ),
115
- date (2021 , 1 , 1 ),
116
- date (2021 , 1 , 2 ),
117
- date (2021 , 1 , 3 ),
118
- date (2021 , 1 , 4 ),
91
+ expected = [date (2020 , 12 , 24 ),
119
92
date (2021 , 1 , 5 )]
120
93
assert set (output ) == set (expected )
121
94
122
95
@freeze_time ("2021-01-05" )
123
- @mock .patch ("delphi_google_symptoms.pull.get_missing_dates" )
124
- @mock .patch ("delphi_google_symptoms.pull.listdir" )
125
- @mock .patch ("delphi_google_symptoms.pull.isfile" )
126
- def test_get_all_dates (self , mock_is_file , mock_get_dates , mock_missing_dates ):
127
- mock_get_dates .return_value = exported_files
128
- mock_is_file = True
129
- mock_missing_dates .return_value = [
130
- date (2020 , 12 , 30 ), date (2021 , 1 , 4 ), date (2021 , 1 , 5 )]
131
-
132
- output = get_all_dates (
133
- "receiving" , datetime .strptime ("20200201" , "%Y%m%d" ))
96
+ def test_get_date_range (self ):
97
+ output = get_date_range (
98
+ datetime .strptime ("20200201" , "%Y%m%d" ),
99
+ 14
100
+ )
134
101
135
- expected = [date (2020 , 12 , 24 ),
136
- date (2020 , 12 , 25 ),
137
- date (2020 , 12 , 26 ),
138
- date (2020 , 12 , 27 ),
139
- date (2020 , 12 , 28 ),
140
- date (2020 , 12 , 29 ),
141
- date (2020 , 12 , 30 ),
142
- date (2020 , 12 , 31 ),
143
- date (2021 , 1 , 1 ),
144
- date (2021 , 1 , 2 ),
145
- date (2021 , 1 , 3 ),
146
- date (2021 , 1 , 4 ),
147
- date (2021 , 1 , 5 )]
102
+ expected = [date (2020 , 12 , 16 ), date (2021 , 1 , 5 )]
148
103
assert set (output ) == set (expected )
149
104
150
105
def test_format_dates_for_query (self ):
151
- date_list = [date (2016 , 12 , 30 ), date (2020 , 12 , 30 ),
152
- date (2021 , 1 , 4 ), date (2021 , 1 , 5 )]
106
+ date_list = [date (2016 , 12 , 30 ), date (2021 , 1 , 5 )]
153
107
output = format_dates_for_query (date_list )
154
-
155
- expected = 'timestamp("2020-12-30"), timestamp("2021-01-04"), timestamp("2021-01-05")'
108
+ expected = ["2016-12-30" , "2021-01-05" ]
156
109
assert output == expected
157
110
158
111
@mock .patch ("pandas_gbq.read_gbq" )
159
112
def test_pull_one_gs_no_dates (self , mock_read_gbq ):
160
113
mock_read_gbq .return_value = pd .DataFrame ()
161
114
162
- output = pull_gs_data_one_geolevel ("state" , {} )
115
+ output = pull_gs_data_one_geolevel ("state" , [ "" , "" ] )
163
116
expected = pd .DataFrame (columns = new_keep_cols )
164
117
assert output .equals (expected )
165
118
0 commit comments