I’m trying to make a Python script that goes through some of our project documents (.pdf) and updates the status/name from R to F. The original text is written in Helvetica, bold, but the new one get slapped on without being bold. I’ve tried every option I could find online, to no avail.
Note that I am not a programmer so this was made via ChatGPT, please don’t flame me 😀
import os
import shutil
import fitz # PyMuPDF
import re
def rename_and_copy_files():
base_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'original')
updated_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'updated')
if not os.path.exists(updated_directory):
os.makedirs(updated_directory)
font_path = "Helvetica-Bold.ttf"
if not os.path.isfile(font_path):
print(f"Font file not found: {font_path}")
return
for filename in os.listdir(base_directory):
if '_R_' in filename:
new_filename = filename.replace('_R_', '_F_')
src = os.path.join(base_directory, filename)
dst = os.path.join(updated_directory, new_filename)
print(f"Processing file: {filename}")
if filename.endswith('.pdf'):
update_pdf_text(src, dst, font_path)
else:
shutil.copy2(src, dst)
print(f"Copied and renamed: {filename} to {new_filename}")
def update_pdf_text(src, dst, font_path):
document = fitz.open(src)
for page_num in range(len(document)):
page = document[page_num]
text_instances = page.search_for("_R_")
for inst in text_instances:
rect = fitz.Rect(inst)
full_text, start_rect, end_rect = extract_full_name(page, rect)
if not full_text:
continue
updated_text = full_text.replace('_R_', '_F_')
page.draw_rect(fitz.Rect(start_rect.x0, start_rect.y0, end_rect.x1, end_rect.y1), color=(1, 1, 1),
fill=(1, 1, 1))
new_y = start_rect.y0 + 10
page.insert_text((start_rect.x0, new_y),
updated_text,
fontsize=10,
fontfile=font_path,
color=(0, 0, 0))
single_r_instances = page.search_for(" R ")
for inst in single_r_instances:
rect = fitz.Rect(inst)
page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1))
new_y = rect.y0 + 10
page.insert_text((rect.x0, new_y),
"F",
fontsize=10,
fontfile=font_path,
color=(0, 0, 0))
document.save(dst, garbage=4, deflate=True)
document.close()
def extract_full_name(page, rect):
full_text = ""
start_rect = rect
end_rect = rect
words = page.get_text("words")
name_pattern = re.compile(r'[A-Za-z0-9_-]+')
for word in words:
word_text = word[4]
if rect.intersects(fitz.Rect(word[:4])) and name_pattern.match(word_text):
start_rect = fitz.Rect(word[:4]) if fitz.Rect(word[:4]).x0 < start_rect.x0 else start_rect
end_rect = fitz.Rect(word[:4]) if fitz.Rect(word[:4]).x1 > end_rect.x1 else end_rect
full_text += word_text
return full_text, start_rect, end_rect
if __name__ == "__main__":
rename_and_copy_files()
print("Process finished")
I’ve tried using different versions of Helvetica bold (.otf and .ttf), and also tried using other fonts (Comic-sans just to see if it would get picked up, and it didn’t). Without the font file, the program doesn’t do anything, so it must realise it exists… just doesn’t use it.
I’m guessing the issue lies within this part:
page.insert_text((rect.x0, new_y),
"F",
fontsize=10,
fontfile=font_path,
color=(0, 0, 0))
However, I’ve ran out of ideas. Any help would be appreciated, thank you! 🙂
Luka is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
2
As this comment solved your problem, Posting it as an answer.
Try setting the font manually, before the page.insert_text part.
page.set_font("font name", fontsize=10, fontfile=font_path)
Following the advice of @Nesi, I added fontname="helvbo"
into:
page.insert_text((start_rect.x0, new_y),
updated_text,
fontsize=10,
fontfile=font_path,
fontname="helvbo",
color=(0, 0, 0))
This solved my problem. Thanks!
Luka is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.