@@ -472,6 +472,7 @@ def download_data(self, path, bucket, key_prefix="", extra_args=None):
472
472
473
473
# Initialize the variables used to loop through the contents of the S3 bucket.
474
474
keys = []
475
+ directories = []
475
476
next_token = ""
476
477
base_parameters = {"Bucket" : bucket , "Prefix" : key_prefix }
477
478
@@ -490,20 +491,26 @@ def download_data(self, path, bucket, key_prefix="", extra_args=None):
490
491
return []
491
492
# For each object, save its key or directory.
492
493
for s3_object in contents :
493
- key = s3_object .get ("Key" )
494
- keys .append (key )
494
+ key : str = s3_object .get ("Key" )
495
+ obj_size = s3_object .get ("Size" )
496
+ if key .endswith ("/" ) and int (obj_size ) == 0 :
497
+ directories .append (key )
498
+ else :
499
+ keys .append (key )
495
500
next_token = response .get ("NextContinuationToken" )
496
501
497
502
# For each object key, create the directory on the local machine if needed, and then
498
503
# download the file.
499
504
downloaded_paths = []
505
+ for dir_path in directories :
506
+ os .makedirs (os .path .join (path , dir_path ), exist_ok = True )
500
507
for key in keys :
501
508
tail_s3_uri_path = os .path .basename (key )
502
509
if not os .path .splitext (key_prefix )[1 ]:
503
510
tail_s3_uri_path = os .path .relpath (key , key_prefix )
504
511
destination_path = os .path .join (path , tail_s3_uri_path )
505
512
if not os .path .exists (os .path .dirname (destination_path )):
506
- os .makedirs (os .path .dirname (destination_path ))
513
+ os .makedirs (os .path .dirname (destination_path ), exist_ok = True )
507
514
s3 .download_file (
508
515
Bucket = bucket , Key = key , Filename = destination_path , ExtraArgs = extra_args
509
516
)
0 commit comments