I have a docx file with table graph which can not recognized by doc.tables.
Here is the file:
https://github.com/python-openxml/python-docx/files/1867861/non_readable_table.docx
Same issue was encountered here. But no answer was given. Please let me know if you have any solution.
from docx import Document
doc = Document("non_readable_table.docx")
print(doc.tables)
def iter_tables(block_item_container):
"""Recursively generate all tables in `block_item_container`."""
for t in block_item_container.tables:
yield t
for row in t.rows:
for cell in row.cells:
yield from iter_tables(cell)
dfs = []
for t in iter_tables(doc):
table = t
df = [['' for i in range(len(table.columns))] for j in range(len(table.rows))]
for i, row in enumerate(table.rows):
for j, cell in enumerate(row.cells):
if cell.text:
df[i][j] = cell.text.replace('n', '')
dfs.append(pd.DataFrame(df))
print(dfs)