I am currently attempting to retrieve the attribute data for all of the buckets within an aws account, the code I have currently works (outside of the environment that each bucket is present in) but it takes a terribly long time to run. I am using a bunch of different retrieval methods because I couldn’t quite find one retrievable object containing all of the metadata that I would need. If anyone has ideas of how I can speed things up I would love the help!
from botocore.exceptions import ClientError
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
def getTagContents(tagging, keyword):
storage = list(filter(lambda tag: tag['Key'] == keyword, tagging))
storage = storage[0]['Value']
with open("quicksight-test.csv", "w", newline='') as csv_file:
fieldnames = ["Name", 'CreationDate', 'Versioned', 'Region', 'Product', 'ProductComponent', 'Environment', 'CustomerName', 'CustomerState', 'StorageClass']
csv_writer = csv.writer(csv_file)
csv_writer.writerow(fieldnames)
response = s3.buckets.limit(50)
tagging = s3_client.get_bucket_tagging(Bucket = bucket.name)['TagSet']
sclass = list(s3.Bucket(bucket.name).objects.limit(1))
sclass = sclass[0].storage_class
csv_writer.writerow([bucket.name, bucket.creation_date, bucket.Versioning().status,s3_client.get_bucket_location(Bucket = bucket.name)['LocationConstraint'],getTagContents(tagging, "Project"), getTagContents(tagging, "ProductComponent")," ",getTagContents(tagging, "CustomerName"), getTagContents(tagging, "CustomerState"), sclass])
end_time = time.time() - start_time
<code>import boto3
import csv
import time
from botocore.exceptions import ClientError
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
def getTagContents(tagging, keyword):
storage = list(filter(lambda tag: tag['Key'] == keyword, tagging))
if storage != "Unknown":
if len(storage) > 0:
storage = storage[0]['Value']
return storage
start_time = time.time()
with open("quicksight-test.csv", "w", newline='') as csv_file:
fieldnames = ["Name", 'CreationDate', 'Versioned', 'Region', 'Product', 'ProductComponent', 'Environment', 'CustomerName', 'CustomerState', 'StorageClass']
csv_writer = csv.writer(csv_file)
csv_writer.writerow(fieldnames)
response = s3.buckets.limit(50)
for bucket in response:
try:
tagging = s3_client.get_bucket_tagging(Bucket = bucket.name)['TagSet']
except ClientError as e:
continue
sclass = list(s3.Bucket(bucket.name).objects.limit(1))
if len(sclass) > 0:
sclass = sclass[0].storage_class
csv_writer.writerow([bucket.name, bucket.creation_date, bucket.Versioning().status,s3_client.get_bucket_location(Bucket = bucket.name)['LocationConstraint'],getTagContents(tagging, "Project"), getTagContents(tagging, "ProductComponent")," ",getTagContents(tagging, "CustomerName"), getTagContents(tagging, "CustomerState"), sclass])
end_time = time.time() - start_time
print(end_time)
</code>
import boto3
import csv
import time
from botocore.exceptions import ClientError
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
def getTagContents(tagging, keyword):
storage = list(filter(lambda tag: tag['Key'] == keyword, tagging))
if storage != "Unknown":
if len(storage) > 0:
storage = storage[0]['Value']
return storage
start_time = time.time()
with open("quicksight-test.csv", "w", newline='') as csv_file:
fieldnames = ["Name", 'CreationDate', 'Versioned', 'Region', 'Product', 'ProductComponent', 'Environment', 'CustomerName', 'CustomerState', 'StorageClass']
csv_writer = csv.writer(csv_file)
csv_writer.writerow(fieldnames)
response = s3.buckets.limit(50)
for bucket in response:
try:
tagging = s3_client.get_bucket_tagging(Bucket = bucket.name)['TagSet']
except ClientError as e:
continue
sclass = list(s3.Bucket(bucket.name).objects.limit(1))
if len(sclass) > 0:
sclass = sclass[0].storage_class
csv_writer.writerow([bucket.name, bucket.creation_date, bucket.Versioning().status,s3_client.get_bucket_location(Bucket = bucket.name)['LocationConstraint'],getTagContents(tagging, "Project"), getTagContents(tagging, "ProductComponent")," ",getTagContents(tagging, "CustomerName"), getTagContents(tagging, "CustomerState"), sclass])
end_time = time.time() - start_time
print(end_time)
I have managed to pare down the number of objects I retrieve for each bucket at least, but its still over a second for each bucket.