@@ -460,6 +460,7 @@ def download_data(self, path, bucket, key_prefix="", extra_args=None):
460
460
461
461
# Initialize the variables used to loop through the contents of the S3 bucket.
462
462
keys = []
463
+ directories = []
463
464
next_token = ""
464
465
base_parameters = {"Bucket" : bucket , "Prefix" : key_prefix }
465
466
@@ -478,20 +479,26 @@ def download_data(self, path, bucket, key_prefix="", extra_args=None):
478
479
return []
479
480
# For each object, save its key or directory.
480
481
for s3_object in contents :
481
- key = s3_object .get ("Key" )
482
- keys .append (key )
482
+ key : str = s3_object .get ("Key" )
483
+ obj_size = s3_object .get ("Size" )
484
+ if key .endswith ("/" ) and int (obj_size ) == 0 :
485
+ directories .append (key )
486
+ else :
487
+ keys .append (key )
483
488
next_token = response .get ("NextContinuationToken" )
484
489
485
490
# For each object key, create the directory on the local machine if needed, and then
486
491
# download the file.
487
492
downloaded_paths = []
493
+ for dir_path in directories :
494
+ os .makedirs (os .path .join (path , dir_path ), exist_ok = True )
488
495
for key in keys :
489
496
tail_s3_uri_path = os .path .basename (key )
490
497
if not os .path .splitext (key_prefix )[1 ]:
491
498
tail_s3_uri_path = os .path .relpath (key , key_prefix )
492
499
destination_path = os .path .join (path , tail_s3_uri_path )
493
500
if not os .path .exists (os .path .dirname (destination_path )):
494
- os .makedirs (os .path .dirname (destination_path ))
501
+ os .makedirs (os .path .dirname (destination_path ), exist_ok = True )
495
502
s3 .download_file (
496
503
Bucket = bucket , Key = key , Filename = destination_path , ExtraArgs = extra_args
497
504
)
0 commit comments