#As such the code works but only with two Threads, if I add another one, the process stops and then takes a time out, I don’t know if DocumentProcessorServiceAsyncClient will have a limit of two threads at a time although most likely I am doing something wrong.
async def get_text_ocr(file_path):
try:
tipo = str(file_path).split(‘.’)[-1]
if tipo.upper() ==’PDF’:
mime_type = ‘application/’+str(file_path).split(‘.’)[-1]
else:
mime_type = ‘image/’+str(file_path).split(‘.’)[-1]
print(mime_type)
#client = documentai.DocumentProcessorServiceClient(
client = documentai.DocumentProcessorServiceAsyncClient(
client_options=ClientOptions(api_endpoint=f'{location}-{endpoint}')
)
name = client.processor_version_path(project_id,location,processor_id,processor_version)
with open(file_path, 'rb') as pag:
image_content = pag.read()
raw_document = documentai.RawDocument(
content = image_content, mime_type=mime_type
)
request = documentai.ProcessRequest(name=name, raw_document=raw_document)
response = await client.process_document(request=request)
document = response.document
return document.text
except Exception as exp:
print(exp)
return None
def procesar_documentos(archivos_leer,Carpeta_destino, hilo_cc):
carpeta_igual=str(archivos_leer[0]).split(‘/’)[5]
for archivo in archivos_leer:
carpeta_actual = (str(archivo).split(‘/’)[5])
nueva_ruta = str(archivo).split(‘/’)[:-1]
nueva_ruta.append(“Identificados”)
nueva_ruta = ‘/’.join(nueva_ruta)
if carpeta_actual != carpeta_igual:
hola = str(“/”).join(str(archivo).split(‘/’)[:5])+”/”
system(f’move “{hola+carpeta_igual}” “{Carpeta_destino}”‘)
carpeta_igual = carpeta_actual
txt = mija(archivo)
system(f’move “{hola+carpeta_igual}” “{Carpeta_destino}”‘)
print(‘Finalizado’)
def mija(file_path):
return asyncio.run(get_text_ocr(file_path))
hilo1 = threading.Thread(target=procesar_documentos, args=(hilos[0],Carpeta_destino,”hilo_1″), name=”Hilo 1″)
hilo2 = threading.Thread(target=procesar_documentos, args=(hilos[1],Carpeta_destino,”hilo_2″), name=”Hilo 2″)
deseo poder utilizar como mínimo 4 hilos para procesar documentos 4 DocumentProcessorServiceAsyncClient al tiempo
Jeison Jose Bolano Pabon is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.