Descargar los archivos de s3 de forma recursiva utilizando boto python.

Tengo un cubo en s3, que tiene una estructura de directorios profunda. Me gustaría poder descargarlos todos a la vez. Mis archivos se ven así:

foo/bar/1. . foo/bar/100 . . 

¿Hay alguna forma de descargar estos archivos de forma recursiva desde el cubo s3 usando boto lib en python?

Gracias por adelantado.

Puede descargar todos los archivos en un contenedor como este (sin probar):

 from boto.s3.connection import S3Connection conn = S3Connection('your-access-key','your-secret-key') bucket = conn.get_bucket('bucket') for key in bucket.list(): try: res = key.get_contents_to_filename(key.name) except: logging.info(key.name+":"+"FAILED") 

Tenga en cuenta que las carpetas en S3 son simplemente otra forma de escribir el nombre de la clave y solo los clientes lo mostrarán como carpetas.

 import boto, os LOCAL_PATH = 'tmp/' AWS_ACCESS_KEY_ID = 'YOUUR_AWS_ACCESS_KEY_ID' AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY' bucket_name = 'your_bucket_name' # connect to the bucket conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket(bucket_name) # go through the list of files bucket_list = bucket.list() for l in bucket_list: keyString = str(l.key) d = LOCAL_PATH + keyString try: l.get_contents_to_filename(d) except OSError: # check if dir exists if not os.path.exists(d): os.makedirs(d) # Creates dirs recurcivly 
 #!/usr/bin/env python import boto import sys, os from boto.s3.key import Key from boto.exception import S3ResponseError DOWNLOAD_LOCATION_PATH = os.path.expanduser("~") + "/s3-backup/" if not os.path.exists(DOWNLOAD_LOCATION_PATH): print ("Making download directory") os.mkdir(DOWNLOAD_LOCATION_PATH) def backup_s3_folder(): BUCKET_NAME = "your-bucket-name" AWS_ACCESS_KEY_ID= os.getenv("AWS_KEY_ID") # set your AWS_KEY_ID on your environment path AWS_ACCESS_SECRET_KEY = os.getenv("AWS_ACCESS_KEY") # set your AWS_ACCESS_KEY on your environment path conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_SECRET_KEY) bucket = conn.get_bucket(BUCKET_NAME) #goto through the list of files bucket_list = bucket.list() for l in bucket_list: key_string = str(l.key) s3_path = DOWNLOAD_LOCATION_PATH + key_string try: print ("Current File is ", s3_path) l.get_contents_to_filename(s3_path) except (OSError,S3ResponseError) as e: pass # check if the file has been downloaded locally if not os.path.exists(s3_path): try: os.makedirs(s3_path) except OSError as exc: # let guard againts race conditions import errno if exc.errno != errno.EEXIST: raise if __name__ == '__main__': backup_s3_folder() 

Acaba de agregar parte de creación de directorio a @ j0nes comentario

 from boto.s3.connection import S3Connection import os conn = S3Connection('your-access-key','your-secret-key') bucket = conn.get_bucket('bucket') for key in bucket.list(): print key.name if key.name.endswith('/'): if not os.path.exists('./'+key.name): os.makedirs('./'+key.name) else: res = key.get_contents_to_filename('./'+key.name) 

Esto descargará los archivos al directorio actual y creará directorios cuando sea necesario.

 import boto from boto.s3.key import Key keyId = 'YOUR_AWS_ACCESS_KEY_ID' sKeyId='YOUR_AWS_ACCESS_KEY_ID' bucketName='your_bucket_name' conn = boto.connect_s3(keyId,sKeyId) bucket = conn.get_bucket(bucketName) for key in bucket.list(): print ">>>>>"+key.name pathV = key.name.split('/') if(pathV[0] == "data"): if(pathV[1] != ""): srcFileName = key.name filename = key.name filename = filename.split('/')[1] destFileName = "model/data/"+filename k = Key(bucket,srcFileName) k.get_contents_to_filename(destFileName) elif(pathV[0] == "nlu_data"): if(pathV[1] != ""): srcFileName = key.name filename = key.name filename = filename.split('/')[1] destFileName = "model/nlu_data/"+filename k = Key(bucket,srcFileName) k.get_contents_to_filename(destFileName`