Am trying to populate data from csv into a pdf form using PYPDF2 library. However, getting following error when using the latest version of PYPDF2
TypeError: sequence indices must be integers
import pandas as pd
import os
from PyPDF2 import PdfWriter, PdfReader
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject
def set_need_appearances_writer(writer: PdfWriter):
try:
catalog = writer._root_object
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
return writer
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer
if __name__ == '__main__':
csv_filename = "filled-w9.csv"
pdf_filename = "form-w9.pdf"
csvin = os.path.normpath(os.path.join(os.getcwd(), 'in', csv_filename))
pdfin = os.path.normpath(os.path.join(os.getcwd(), 'in', pdf_filename))
pdfout = os.path.normpath(os.path.join(os.getcwd(), 'out'))
data = pd.read_csv(csvin)
reader = PdfReader(open(pdfin, "rb"), strict=False)
if "/AcroForm" in reader.trailer["/Root"]:
reader.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_fields = [str(x) for x in reader.get_fields().keys()] # List of all pdf field names
csv_fields = data.columns.tolist()
i = 0 # Filename numerical prefix
for j, rows in data.iterrows():
i += 1
writer = PdfWriter()
set_need_appearances_writer(writer)
if "/AcroForm" in writer._root_object:
writer._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
# Key = pdf_field_name : Value = csv_field_value
field_dictionary_1 = {"f1_01[0]": str(rows['f1_01[0]']),
"f1_02[0]": (rows['f1_02[0]']),
"c1_1[0]": (rows['c1_1[0]']),
"c1_1[1]": (rows['c1_1[1]']),
"c1_1[2]": (rows['c1_1[2]']),
"c1_1[3]": (rows['c1_1[3]']),
"c1_1[4]": (rows['c1_1[4]']),
"c1_1[5]": (rows['c1_1[5]']),
"f1_03[0]": (rows['f1_03[0]']),
"c1_1[6]": (rows['c1_1[6]']),
"f1_04[0]": (rows['f1_04[0]']),
"c1_2[0]": (rows['c1_2[0]']),
"Boxes3a-b_ReadOrder[0]": (rows['Boxes3a-b_ReadOrder[0]']),
"f1_05[0]": (rows['f1_05[0]']),
"f1_06[0]": (rows['f1_06[0]']),
"f1_07[0]": (rows['f1_07[0]']),
"f1_08[0]": (rows['f1_08[0]']),
"Address_ReadOrder[0]": (rows['Address_ReadOrder[0]']),
"f1_09[0]": (rows['f1_09[0]']),
"f1_10[0]": (rows['f1_10[0]']),
"f1_11[0]": (rows['f1_11[0]']),
"f1_12[0]": (rows['f1_12[0]']),
"f1_13[0]": (rows['f1_13[0]']),
"f1_14[0]": (rows['f1_14[0]']),
"f1_15[0]": (rows['f1_15[0]']),
}
temp_out_dir = os.path.normpath(os.path.join(pdfout, str(i) + 'out.pdf'))
page = writer.pages[reader.pages[0]]
writer.update_page_form_field_values(writer.get_page(0), field_dictionary_1)
# pdf2.addPage(pdf.getPage(1))
# pdf2.addPage(pdf.getPage(2))
# pdf2.addPage(pdf.getPage(3))
# pdf2.addPage(pdf.getPage(4))
# pdf2.addPage(pdf.getPage(5))
outputStream = open(temp_out_dir, "wb")
writer.write(outputStream)
outputStream.close()
print(f'Process Complete: {i} PDFs Processed!')
changed line 75 in my code from this page=writer.pages(reader.pages(0))to this page=writer.pages[reader.pages[0]], still does not work
New contributor
Grace is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.