Skip to content

Commit 0aad395

Browse files
authored
Merge pull request #785 from cmu-delphi/changehc_dropdate
changehc: fetch files based on filename not modification time
2 parents c4a5e9f + 754058b commit 0aad395

File tree

3 files changed

+32
-28
lines changed

3 files changed

+32
-28
lines changed

changehc/delphi_changehc/download_ftp_files.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Download files modified in the last 24 hours from the specified ftp server."""
1+
"""Download files from the specified ftp server."""
22

33
# standard
44
import datetime
@@ -16,21 +16,19 @@ def print_callback(filename, bytes_so_far, bytes_total):
1616
print(f'{filename} transfer: {rough_percent_transferred}%')
1717

1818

19-
def get_files_from_dir(sftp, out_path):
20-
"""Download files from sftp server that have been uploaded in last day.
19+
def get_files_from_dir(sftp, filedate, out_path):
20+
"""Download files from sftp server tagged with the specified day.
2121
2222
Args:
2323
sftp: SFTP Session from Paramiko client
24+
filedate: YYYYmmdd string for which the files are named
2425
out_path: Path to local directory into which to download the files
2526
"""
26-
current_time = datetime.datetime.now()
27-
2827
# go through files in recieving dir
2928
filepaths_to_download = {}
3029
for fileattr in sftp.listdir_attr():
31-
file_time = datetime.datetime.fromtimestamp(fileattr.st_mtime)
3230
filename = fileattr.filename
33-
if current_time - file_time < datetime.timedelta(days=1) and \
31+
if fileattr.filename.startswith(filedate) and \
3432
not path.exists(path.join(out_path, filename)):
3533
filepaths_to_download[filename] = path.join(out_path, filename)
3634

@@ -43,10 +41,11 @@ def get_files_from_dir(sftp, out_path):
4341
sftp.get(infile, outfile, callback=callback_for_filename)
4442

4543

46-
def download_covid(out_path, ftp_conn):
44+
def download_covid(filedate, out_path, ftp_conn):
4745
"""Download files necessary to create chng-covid signal from ftp server.
4846
4947
Args:
48+
filedate: YYYYmmdd string for which the files are named
5049
out_path: Path to local directory into which to download the files
5150
ftp_conn: Dict containing login credentials to ftp server
5251
"""
@@ -62,20 +61,21 @@ def download_covid(out_path, ftp_conn):
6261
sftp = client.open_sftp()
6362

6463
sftp.chdir('/dailycounts/All_Outpatients_By_County')
65-
get_files_from_dir(sftp, out_path)
64+
get_files_from_dir(sftp, filedate, out_path)
6665

6766
sftp.chdir('/dailycounts/Covid_Outpatients_By_County')
68-
get_files_from_dir(sftp, out_path)
67+
get_files_from_dir(sftp, filedate, out_path)
6968

7069
finally:
7170
if client:
7271
client.close()
7372

7473

75-
def download_cli(out_path, ftp_conn):
74+
def download_cli(filedate, out_path, ftp_conn):
7675
"""Download files necessary to create chng-cli signal from ftp server.
7776
7877
Args:
78+
filedate: YYYYmmdd string for which the files are named
7979
out_path: Path to local directory into which to download the files
8080
ftp_conn: Dict containing login credentials to ftp server
8181
"""
@@ -91,19 +91,19 @@ def download_cli(out_path, ftp_conn):
9191
sftp = client.open_sftp()
9292

9393
sftp.chdir('/dailycounts/All_Outpatients_By_County')
94-
get_files_from_dir(sftp, out_path)
94+
get_files_from_dir(sftp, filedate, out_path)
9595

9696
sftp.chdir('/dailycounts/Flu_Patient_Count_By_County')
97-
get_files_from_dir(sftp, out_path)
97+
get_files_from_dir(sftp, filedate, out_path)
9898

9999
sftp.chdir('/dailycounts/Mixed_Patient_Count_By_County')
100-
get_files_from_dir(sftp, out_path)
100+
get_files_from_dir(sftp, filedate, out_path)
101101

102102
sftp.chdir('/dailycounts/Flu_Like_Patient_Count_By_County')
103-
get_files_from_dir(sftp, out_path)
103+
get_files_from_dir(sftp, filedate, out_path)
104104

105105
sftp.chdir('/dailycounts/Covid_Like_Patient_Count_By_County')
106-
get_files_from_dir(sftp, out_path)
106+
get_files_from_dir(sftp, filedate, out_path)
107107

108108
finally:
109109
if client:

changehc/delphi_changehc/run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ def retrieve_files(params, filedate, logger):
2525
## download recent files from FTP server
2626
logger.info("downloading recent files through SFTP")
2727
if "covid" in params["types"]:
28-
download_covid(params["cache_dir"], params["ftp_conn"])
28+
download_covid(filedate, params["cache_dir"], params["ftp_conn"])
2929
if "cli" in params["types"]:
30-
download_cli(params["cache_dir"], params["ftp_conn"])
30+
download_cli(filedate, params["cache_dir"], params["ftp_conn"])
3131

3232
denom_file = "%s/%s_All_Outpatients_By_County.dat.gz" % (params["cache_dir"],filedate)
3333
covid_file = "%s/%s_Covid_Outpatients_By_County.dat.gz" % (params["cache_dir"],filedate)

changehc/tests/test_download_ftp_files.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,27 +37,31 @@ def __init__(self, time, name):
3737
def test_get_files(self, mock_path):
3838

3939
# When one new file is present, one file is downloaded
40-
one_new = self.MockSFTP([self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)),"foo")])
41-
get_files_from_dir(one_new, "")
40+
one_new = self.MockSFTP([
41+
self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)), "00001122_foo")
42+
])
43+
get_files_from_dir(one_new, "00001122", "")
4244
assert one_new.num_gets == 1
4345

4446
# When one new file and one old file are present, one file is downloaded
45-
one_new_one_old = self.MockSFTP([self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)),"foo"),
46-
self.FileAttr(dt.timestamp(dt.now()-timedelta(days=10)),"foo")])
47-
get_files_from_dir(one_new_one_old, "")
47+
one_new_one_old = self.MockSFTP([
48+
self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)), "00005566_foo"),
49+
self.FileAttr(dt.timestamp(dt.now()-timedelta(days=10)), "00001122_foo")
50+
])
51+
get_files_from_dir(one_new_one_old, "00005566", "")
4852
assert one_new_one_old.num_gets == 1
4953

5054
# When three new files are present, AssertionError
51-
new_file1 = self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)),"foo1")
52-
new_file2 = self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)),"foo2")
53-
new_file3 = self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)),"foo3")
55+
new_file1 = self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)), "00001122_foo1")
56+
new_file2 = self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)), "00001122_foo2")
57+
new_file3 = self.FileAttr(dt.timestamp(dt.now()-timedelta(minutes=1)), "00001122_foo3")
5458
three_new = self.MockSFTP([new_file1, new_file2, new_file3])
5559
with pytest.raises(AssertionError):
56-
get_files_from_dir(three_new,"")
60+
get_files_from_dir(three_new, "00001122", "")
5761

5862
# When the file already exists, no files are downloaded
5963
mock_path.exists.return_value = True
6064
one_exists = self.MockSFTP([new_file1])
61-
get_files_from_dir(one_new, "")
65+
get_files_from_dir(one_new, "00001122", "")
6266
assert one_exists.num_gets == 0
6367

0 commit comments

Comments
 (0)