I’m trying to read a very large zip file on a s3 bucket and extract its data on another s3 bucket using the code below as lambda function:
import json
import boto3
from io import BytesIO
import zipfile
def lambda_handler(event, context):
s3_resource = boto3.resource('s3')
source_bucket = 'bucket1'
target_bucket = 'bucket2'
for file in my_bucket.objects.all():
if (str(file.key).endswith('.zip')):
zip_obj = s3_resource.Object(bucket_name=source_bucket, key=file.key)
buffer = BytesIO(zip_obj.get()["Body"].read())
z = zipfile.ZipFile(buffer)
for filename in z.namelist():
file_info = z.getinfo(filename)
try:
response = s3_resource.meta.client.upload_fileobj(
z.open(filename),
Bucket=target_bucket,
Key=f'{filename}'
)
except Exception as e:
print(e)
else:
print(file.key + ' is not a zip file.')
Now, the problem is this code reads the whole file into memory and I’m getting MemoryError.
I want to know if there is any more efficient way to dot this? like read the file in chunks?
Thanks.