-
Notifications
You must be signed in to change notification settings - Fork 16
CHC SFTP Downloads #352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CHC SFTP Downloads #352
Changes from 3 commits
17bc7ab
e4343fc
638c578
571d51d
f115859
6d789d8
b310fd9
b27f6da
c8fe35f
7248fb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" | ||
Downloads files modified in the last 24 hours from the specified ftp server.""" | ||
|
||
# standard | ||
import datetime | ||
import functools | ||
import sys | ||
from os import path | ||
|
||
# third party | ||
import paramiko | ||
|
||
class AllowAnythingPolicy(paramiko.MissingHostKeyPolicy): | ||
def missing_host_key(self, client, hostname, key): | ||
return | ||
|
||
|
||
def print_callback(filename, bytes_so_far, bytes_total): | ||
rough_percent_transferred = int(100 * (bytes_so_far / bytes_total)) | ||
if (rough_percent_transferred % 25) == 0: | ||
print(f'{filename} transfer: {rough_percent_transferred}%') | ||
|
||
|
||
def get_files_from_dir(sftp, out_path): | ||
current_time = datetime.datetime.now() | ||
seconds_in_day = 24 * 60 * 60 | ||
|
||
# go through files in recieving dir | ||
files_to_download = [] | ||
for fileattr in sftp.listdir_attr(): | ||
file_time = datetime.datetime.fromtimestamp(fileattr.st_mtime) | ||
time_diff_to_current_time = current_time - file_time | ||
if time_diff_to_current_time.total_seconds() <= seconds_in_day: | ||
rumackaaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
files_to_download.append(fileattr.filename) | ||
|
||
filepaths_to_download = {} | ||
for file in files_to_download: | ||
rumackaaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
full_path = path.join(out_path, file) | ||
if path.exists(full_path): | ||
print(f"{file} exists, skipping") | ||
else: | ||
filepaths_to_download[file] = full_path | ||
|
||
# make sure we don't download more than 2 files per day | ||
assert len(files_to_download) <= 2, "more files dropped than expected" | ||
chinandrew marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# download! | ||
for infile, outfile in filepaths_to_download.items(): | ||
callback_for_filename = functools.partial(print_callback, infile) | ||
sftp.get(infile, outfile, callback=callback_for_filename) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. callback is cool, didn't know of this feature in paramiko |
||
|
||
|
||
def download(out_path, ftp_conn): | ||
|
||
# open client | ||
client = paramiko.SSHClient() | ||
client.set_missing_host_key_policy(AllowAnythingPolicy()) | ||
rumackaaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
client.connect(ftp_conn["host"], username=ftp_conn["user"], | ||
password=ftp_conn["pass"][1:] + ftp_conn["pass"][0], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is my super hi-tech encryption method. I store the password in plaintext as "dpasswor" and then the client connects using "password." It's almost as foolproof as RSA, just don't tell anyone! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to do this if credentials are encrypted at rest in ansible? |
||
port=ftp_conn["port"], | ||
allow_agent=False, look_for_keys=False) | ||
sftp = client.open_sftp() | ||
|
||
sftp.chdir('/dailycounts/All_Outpatients_By_County') | ||
get_files_from_dir(sftp, out_path) | ||
|
||
sftp.chdir('/dailycounts/Covid_Outpatients_By_County') | ||
get_files_from_dir(sftp, out_path) | ||
|
||
client.close() | ||
rumackaaron marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from delphi_utils import read_params | ||
|
||
# first party | ||
from .download_ftp_files import download | ||
from .update_sensor import CHCSensorUpdator | ||
|
||
|
||
|
@@ -25,6 +26,10 @@ def run_module(): | |
|
||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
## download recent files from FTP server | ||
logging.info("downloading recent files through SFTP") | ||
download(params["cache_dir"], params["ftp_conn"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be mocked out for testing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
|
||
## get end date from input file | ||
# the filenames are expected to be in the format: | ||
# Denominator: "YYYYMMDD_All_Outpatients_By_County.dat.gz" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,8 @@ | |
"delphi-utils", | ||
"covidcast", | ||
"boto3", | ||
"moto" | ||
"moto", | ||
"paramiko" | ||
] | ||
|
||
setup( | ||
|
Uh oh!
There was an error while loading. Please reload this page.