`i am trying to convert pdf file to svg with correct formating
i am using the fitz pymupdf library
the text is formatted correctly but i cant adjust the image
“this is my first time working with pdf or svg”
this is the code
import fitz # PyMuPDF
import base64
import zipfile
def pdf_to_svg_with_text_and_images(pdf_path, output_zip_path):
document = fitz.open(pdf_path)
num_pages = len(document)
with zipfile.ZipFile(output_zip_path, 'w') as zipf:
for page_index in range(num_pages):
page = document.load_page(page_index)
svg_header = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
version="1.1" id="main1" width="{width}" height="{height}">
<g>
'''.format(width=int(page.rect.width), height=int(page.rect.height))
svg_content = ""
current_y = 0 # Starting Y position for text
text_instances = page.get_text("dict")["blocks"]
for instance in text_instances:
if "lines" in instance:
for line in instance["lines"]:
for span in line["spans"]:
svg_content += f'<text style="fill:#000000;font-family:
{span["font"]};font-size:{span["size"]}px;" fill-opacity="1" x="
{span["bbox"][0]}" y="{span["bbox"][3]}">{span["text"]}</text>n'
current_y = max(current_y, span["bbox"][3])
image_list = page.get_images(full=True)
if image_list:
for img_index, img in enumerate(image_list):
xref = img[0]
base_image = document.extract_image(xref)
image_bytes = base_image["image"]
img_width, img_height = 400, 400
img_x, img_y = img[1], img[2]
img_base64 = base64.b64encode(image_bytes).decode("utf-8")
svg_content += f'<image x="{img_x}" y="{img_y}" width="{img_width}"
height="
{img_height}" xlink:href="data:image/{base_image["ext"]};base64,
{img_base64}"/>n'
svg_footer = '''
</g>
</svg>'''
if svg_content:
svg_filename = f'page{page_index + 1}.svg'
full_svg_content = svg_header + svg_content + svg_footer
zipf.writestr(svg_filename, full_svg_content)
print(f"SVG file {svg_filename} added to zip")
document.close()
input_pdf_path = 'testing_new.pdf'
output_zip_path = 'output_svgs.zip'
pdf_to_svg_with_text_and_images(input_pdf_path, output_zip_path)`
`
New contributor
ahmad tayyab is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.