Thiết kế website giá rẻ

Question

In my python code I have added function process_docx to annotate header and paragraphs and also one more function annotate_images to annotate images.

I find a problem in the output doc that header (annotated in yellow) isn’t getting annotated and images (should be annotated in light blue) isn’t getting annotated. Only the paragraphs are annotated in grey.

In the input word document, I had added 1 header using insert header settings, 2 paragraphs and 1 image that was taken from web. In the output word document, only the paragraphs are getting annotated.

Here’s the input document I had used.

Here’s the output document.

<code>from docx import Document

from docx.shared import RGBColor

from docx.oxml import OxmlElement

from docx.oxml.ns import qn

</code>

<code>from docx import Document from docx.shared import RGBColor from docx.oxml import OxmlElement from docx.oxml.ns import qn </code>

from docx import Document
from docx.shared import RGBColor
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

<code>def annotate_images(doc):

for rel in doc.part.rels.values():

if "image" in rel.target_ref:

image = rel.target_part

for paragraph in doc.paragraphs:

for elem in paragraph._element.iter():

if elem.tag.endswith('}inline'):

if elem.attrib.get(qn('r:embed')) == rel.rel_id:

set_paragraph_bg_color(paragraph, 'ADD8E6') # light blue

break

</code>

<code>def annotate_images(doc): for rel in doc.part.rels.values(): if "image" in rel.target_ref: image = rel.target_part for paragraph in doc.paragraphs: for elem in paragraph._element.iter(): if elem.tag.endswith('}inline'): if elem.attrib.get(qn('r:embed')) == rel.rel_id: set_paragraph_bg_color(paragraph, 'ADD8E6') # light blue break </code>

def annotate_images(doc):
    for rel in doc.part.rels.values():
        if "image" in rel.target_ref:
            image = rel.target_part
            for paragraph in doc.paragraphs:
                for elem in paragraph._element.iter():
                    if elem.tag.endswith('}inline'):
                        if elem.attrib.get(qn('r:embed')) == rel.rel_id:    
                            set_paragraph_bg_color(paragraph, 'ADD8E6') # light blue
                            break

<code>def process_docx(input_path, output_path):

doc = Document(input_path)

for paragraph in doc.paragraphs:

if paragraph.style.name.startswith('Heading'):

set_paragraph_bg_color(paragraph, 'FFFF00') # Yellow

elif paragraph.text.strip(): #Ensures non-empty paragraphs

set_paragraph_bg_color(paragraph, 'D3D3D3') # Light grey

annotate_images(doc)

doc.save(output_path)

</code>

<code>def process_docx(input_path, output_path): doc = Document(input_path) for paragraph in doc.paragraphs: if paragraph.style.name.startswith('Heading'): set_paragraph_bg_color(paragraph, 'FFFF00') # Yellow elif paragraph.text.strip(): #Ensures non-empty paragraphs set_paragraph_bg_color(paragraph, 'D3D3D3') # Light grey annotate_images(doc) doc.save(output_path) </code>

def process_docx(input_path, output_path):
    doc = Document(input_path)

    for paragraph in doc.paragraphs:
        if paragraph.style.name.startswith('Heading'):
            set_paragraph_bg_color(paragraph, 'FFFF00') # Yellow
        elif paragraph.text.strip(): #Ensures non-empty paragraphs
            set_paragraph_bg_color(paragraph, 'D3D3D3') # Light grey

    annotate_images(doc)
    
    doc.save(output_path)

I’ll provide the whole code if you guys didn’t understand. I see there is a sitewide rule for minimal, reproducible example hence I asked my problem concisely. I also asked this question in codeproject but to no avail.

I couldn’t do anything much about the header and for the image annotation I tried this code by adding hashing technique but it didn’t work.

<code>import hashlib

def annotate_images(doc):

for rel in doc.part.rels.values():

if "image" in rel.target_ref:

image = rel.target_part

image_hash = hashlib.sha256(image.blob).hexdigest()

for paragraph in doc.paragraphs:

paragraph_text_hash = hashlib.sha256(paragraph.text.encode('utf-8')).hexdigest()

if image_hash == paragraph_text_hash:

annotate_paragraph(paragraph, 'ADD8E6')

</code>

<code>import hashlib def annotate_images(doc): for rel in doc.part.rels.values(): if "image" in rel.target_ref: image = rel.target_part image_hash = hashlib.sha256(image.blob).hexdigest() for paragraph in doc.paragraphs: paragraph_text_hash = hashlib.sha256(paragraph.text.encode('utf-8')).hexdigest() if image_hash == paragraph_text_hash: annotate_paragraph(paragraph, 'ADD8E6') </code>

import hashlib

def annotate_images(doc):
    for rel in doc.part.rels.values():
        if "image" in rel.target_ref:
            image = rel.target_part
            image_hash = hashlib.sha256(image.blob).hexdigest()
            for paragraph in doc.paragraphs:
                paragraph_text_hash = hashlib.sha256(paragraph.text.encode('utf-8')).hexdigest()
                if image_hash == paragraph_text_hash:
                    annotate_paragraph(paragraph, 'ADD8E6')

Thiết kế website giá rẻ

Danh mục

How do I get the annotation for header and image done correctly?