import boto3
from botocore.exceptions import ClientError
class TextractWrapper:
"""Encapsulates Textract functions."""
def __init__(self, textract_client, s3_resource, sqs_resource):
"""
:param textract_client: A Boto3 Textract client.
:param s3_resource: A Boto3 Amazon S3 resource.
:param sqs_resource: A Boto3 Amazon SQS resource.
"""
self.textract_client = textract_client
self.s3_resource = s3_resource
self.sqs_resource = sqs_resource
def analyze_file(self, bucket, file_name):
"""
Detects text and tables in a PDF file stored in an Amazon S3 bucket.
:param bucket: The name of the S3 bucket.
:param file_name: The name of the PDF file.
:return: The JobId of the text detection job.
"""
try:
response = self.textract_client.start_document_analysis(
DocumentLocation={'S3Object': {'Bucket': bucket, 'Name': file_name}},
FeatureTypes=["TABLES", 'FORMS','SIGNATURES','LAYOUT']
)
print(f"Started text detection job with id: {response['JobId']}")
except ClientError as e:
print(f"Couldn't start text detection job: {e}")
raise
else:
return response["JobId"]
I using this code end work well with others documents, but with one isnt working. End the strange part it is work when i upload on textract aws directly. But i want to make an automation with lambda end s3.
Try return Tables using boto3 but just return LINES end WORDS