I’m trying to convert the pdf to images on the ubuntu server. However it fails without any specific error. I tested in on ubuntu server 20.04 and 22.04 as well as WSL ubuntu 22.04. The error is the same. it is working fine on Windows.
I have proper permissions setup and poppler-tools are installed and properly configured.
My Function Code:
def convert_pdf_to_images(pdf_path, output_folder):
image_paths = []
output_folder = os.path.join('files', output_folder)
# check if output folder exists and not empty
if os.path.exists(output_folder) and os.listdir(output_folder):
logger('Output folder exists and not empty, returning image paths from folder')
image_paths = [os.path.join(output_folder, f) for f in os.listdir(output_folder) if os.path.isfile(os.path.join(output_folder, f))]
return image_paths
images = convert_from_path(pdf_path, dpi=300, fmt='png', poppler_path=POPPLER_PATH)
logger('PDF converted to images')
logger('Creating output folder')
if not os.path.exists(output_folder):
try:
logger('Output folder does not exist')
os.makedirs(output_folder)
logger('Output folder created')
except OSError as e:
logger('Failed to create output folder. {}'.format(e))
else:
try:
logger('Output folder exists')
shutil.rmtree(output_folder)
logger('Output folder deleted')
os.makedirs(output_folder)
logger('Output folder created')
except OSError as e:
logger('Failed to create output folder. {}'.format(e))
for i, image in enumerate(images):
try:
image_path = os.path.join(output_folder, f'{i + 1}.png')
with open(image_path, 'wb') as out_file: # Use context manager
logger(f'Image file opened for creation: {image_path}')
image.save(out_file, 'PNG')
logger(f'Image saved: {image_path}')
image_paths.append(image_path) # Track successful image paths
except OSError as e:
return jsonify({'message': 'Failed to save image', 'error': str(e)}), 500
except Exception as e:
return jsonify({'message': 'Failed to convert PDF to images', 'error': str(e)}), 500
# resize the images
for image_path in image_paths:
logger('Resizing image: ' + image_path)
resize_image(image_path)
return image_paths
This is the error which i get on all versions of Ubuntu i tested above:
[2024-09-03 08:57:57 +0000] [31837] [ERROR] Error handling request /api/ask
Traceback (most recent call last):
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/gunicorn/workers/sync.py", line 135, in handle
self.handle_request(listener, req, client, addr)
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/gunicorn/workers/sync.py", line 178, in handle_request
respiter = self.wsgi(environ, resp.start_response)
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/flask/app.py", line 1498, in call
return self.wsgi_app(environ, start_response)
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/flask/app.py", line 1473, in wsgi_app
response = self.full_dispatch_request()
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/flask/app.py", line 880, in full_dispatch_request
rv = self.dispatch_request()
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/flask/app.py", line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/flask_jwt_extended/view_decorators.py", line 170, in decorator
return current_app.ensure_sync(fn)(*args, **kwargs)
File "/home/ubuntu/docugpt/app.py", line 576, in ask_question
content, error = get_openai_content_from_file(file_id, file_path, file_extension)
File "/home/ubuntu/docugpt/app.py", line 310, in get_openai_content_from_file
image_paths = convert_pdf_to_images(file_path, file_id)
File "/home/ubuntu/docugpt/app.py", line 98, in convert_pdf_to_images
images = convert_from_path(pdf_path, dpi=300, fmt='png', poppler_path=POPPLER_PATH)
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/pdf2image/pdf2image.py", line 250, in convert_from_path
data, err = proc.communicate(timeout=timeout)
File "/usr/lib/python3.10/subprocess.py", line 1154, in communicate
stdout, stderr = self._communicate(input, endtime, timeout)
File "/usr/lib/python3.10/subprocess.py", line 2021, in _communicate
ready = selector.select(timeout)
File "/usr/lib/python3.10/selectors.py", line 416, in select
fd_event_list = self._selector.poll(timeout)
File "/home/ubuntu/docugpt/venv/lib/python3.10/site-packages/gunicorn/workers/base.py", line 203, in handle_abort
sys.exit(1)
SystemExit: 1
[2024-09-03 08:57:57 +0000] [31837] [INFO] Worker exiting (pid: 31837)
[2024-09-03 08:57:58 +0000] [31850] [INFO] Booting worker with pid: 31850
0