I’m working on a Python application that generates PDFs for exams. The exam data is fetched from a server, and any math expressions in the data are stored in MathML format. I’m having trouble inserting these MathML expressions into the PDF.
I tried converting the MathML to Latex first then to image using this answer and matplotlib, it worked for simple expressions, but didn’t work with very complex ones, I guess because it doesn’t cover all MathML entities, because I keep seeing errors such as Entity not defined or expected token.
I know the MathMl that I obtained is valid because I tried rendering it on the browser and it is working.
I also tried searching for ways to insert MathML directly to avoid conversion, but couldn’t find any.
Here is an example I tried that failed.
import matplotlib.pyplot as plt
from lxml import etree
def mathml_to_tex(mathml):
xslt = etree.parse('./mathml/mmltex.xsl')
transform = etree.XSLT(xslt)
mathml_tree = etree.fromstring(mathml)
svg_tree = transform(mathml_tree)
return str(svg_tree)
def latex_to_image(latex_code):
fig, ax = plt.subplots(figsize=(5, 1))
ax.text(0.5, 0.5, latex_code, fontsize=20, ha='center', va='center')
ax.axis('off')
buffer = io.BytesIO()
fig.savefig(buffer, format='png', bbox_inches='tight', pad_inches=0)
buffer.seek(0)
plt.close(fig)
return buffer
def show_tex(mathml, output_file):
tex = mathml_to_tex(mathml)
img = latex_to_image(tex)
with open(output_file, "wb") as temp_file:
temp_file.write(img.read())
mathml_data = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<msqrt>
<mi>x</mi>
<mo> </mo>
<mo>+</mo>
<mo> </mo>
<mi>y</mi>
</msqrt>
<mo> </mo>
<mo>+</mo>
<mo> </mo>
<mfenced>
<mtable>
<mtr>
<mtd>
<mi>i</mi>
</mtd>
<mtd>
<mi>j</mi>
</mtd>
<mtd>
<mi>k</mi>
</mtd>
</mtr>
<mtr>
<mtd>
<mn>5</mn>
</mtd>
<mtd>
<mn>7</mn>
</mtd>
<mtd>
<mn>8</mn>
</mtd>
</mtr>
<mtr>
<mtd>
<mi>θ</mi>
</mtd>
<mtd>
<mi>η</mi>
</mtd>
<mtd>
<mi>x</mi>
</mtd>
</mtr>
</mtable>
</mfenced>
<mo>×</mo>
<mo>∯</mo>
<msub>
<mi>log</mi>
<mn>2</mn>
</msub>
<mfenced>
<mrow>
<mo>∂</mo>
<mi>sin</mi>
<mfenced>
<mrow>
<mi>ln</mi>
<mfenced>
<munderover>
<mo>∑</mo>
<mi mathvariant="fraktur">B</mi>
<mi mathvariant="script">B</mi>
</munderover>
</mfenced>
</mrow>
</mfenced>
</mrow>
</mfenced>
<mo> </mo>
<mo>∈</mo>
<mo> </mo>
<mi mathvariant="normal">ℝ</mi>
<mo> </mo>
</math>
'''
show_tex(mathml_data, 'output.png')
ValueError:
sqrt{x + y} + left(begin{array}{ccc}i& j& k\ 5& 7& 8\ theta & eta & xend{array}right)times ∯{mathrm{log}}_{2}left(partial mathrm{sin}left(mathrm{ln}left(sum _{mathfrak{B}}^{mathcal{B}}right)right)right) in mathrm{ℝ}
^
ParseSyntaxException: Expected token, found '' (at char 22), (line:1, col:23)