I’m very new to coding so any help is appreciated greatly!
I’ve written a python script that’s suppose to look through the JPG images in a PDF file and swap them for png equivalents that I have in a directory on my computer. Basically the pngs are just the same as the JPG image but with the white backgrounds removed.
The script does successfully remove the original jpg’s and swap in the png equivalent, but unfortunately the png images end up not being the same scale/size as the original jpg and they don’t get placed in the same spot that the original JPG was. Any idea what I’m doing wrong?
I’m running it from terminal on a Macbook pro.
Thank you for your help!
import fitz # PyMuPDF
from PIL import Image
import os
from io import BytesIO
# Path to the PDF file
pdf_path = '/example/example/example.pdf'
# Path to the directory containing PNG files
png_dir = '/Users/example/example'
# Output PDF path
output_pdf_path = '/Users/example/example'
# Open the PDF
pdf_document = fitz.open(pdf_path)
# Iterate through the pages
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
images = page.get_images(full=True)
for img_index, img in enumerate(images):
xref = img[0]
# Get the image information
base_image = pdf_document.extract_image(xref)
image_name = f"img_{xref}"
image_ext = base_image["ext"]
# Generate file names for extracted and replacement images
old_img_path = f"{image_name}.{image_ext}"
new_img_path = os.path.join(png_dir, f"{image_name}.png")
# If a corresponding PNG file exists, replace the image
if os.path.exists(new_img_path):
print(f"Replacing {old_img_path} with {new_img_path}")
# Load the new image
image_obj = Image.open(new_img_path)
# Get the dimensions of the new image
new_width, new_height = image_obj.size
# Calculate the position to insert the new image
x, y = img[2], img[3] # Top-left corner coordinates of the original image
rect = fitz.Rect(x, y, x + new_width, y + new_height)
# Convert the image to bytes for insertion
image_bytes = BytesIO()
image_obj.save(image_bytes, format='PNG')
image_bytes = image_bytes.getvalue()
# Insert the new image in the same position
page.insert_image(rect, stream=image_bytes)
# Remove the old image
page.delete_image(xref)
else:
print(f"No corresponding PNG found for {old_img_path}, skipping...")
# Save the updated PDF
pdf_document.save(output_pdf_path)
pdf_document.close()
aurora912 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.