I am using pypdfium2 to read text from a PDF and then later move it to a different folder, but I am getting the same error telling me that it is open and can’t be moved.
How do I properly close the document for it to be moved? I added the True parameter for autoclose but no change.
I will add that stopping my program still means I can’t move the file around in file explorer. I need to close out of Spyder entirely before I can move these files around.
import time
import watchdog.events
import watchdog.observers
import shutil
import pypdfium2 as pdfium
from pypdf import PdfReader, PdfWriter
class Handler(watchdog.events.PatternMatchingEventHandler):
def __init__(self):
watchdog.events.PatternMatchingEventHandler.__init__(self, patterns=['*.pdf'],
ignore_directories=True, case_sensitive=False)
def id_parsing(self, path, index):
id = ""
pdf = pdfium.PdfDocument(path, "", True)
try:
page = pdf[index].get_textpage()
if (str(page.search("A285").get_next()) != "None"):
x = page.search("A285").get_next()[0]
idArr = page.get_text_bounded()[x+31:x+43].split()
id = str(max(idArr, key=len))
except Exception:
id = "-1"
return id
def on_created(self, event):
print("Watchdog received created event - %s." %event.src_path)
reader = PdfReader(event.src_path)
lastId = "";
id = ""
startIndex = 0;
for i in range(len(reader.pages) + 1):
id = self.id_parsing(event.src_path, i)
if (id == ""):
id = lastId
print("ID:" + id)
if (id != lastId and lastId != ""):
writer = PdfWriter()
outputPdf = lastId + '.pdf'
for page in range(startIndex, i):
writer.add_page(reader.pages[page])
with open(outputPdf, "wb") as f:
writer.write(f)
startIndex = i
source = r'C:Usersjkaplan.spyder-py3'
dest = r'C:UsersjkaplanDocumentsTestProcessed Files'
shutil.move(source + '\' + outputPdf, dest)
lastId = id
dest2 = r'C:UsersjkaplanDocumentsTestCompleted'
#shutil.move(event.src_path, dest2)
if __name__ == "__main__":
src_path = r"C:UsersjkaplanDocumentsTest"
observer = watchdog.observers.Observer()
event_handler = Handler()
observer.schedule(event_handler, path=src_path, recursive = False)
observer.start()
try:
while True:
time.sleep(15)
except KeyboardInterrupt:
observer.stop()
print("Observer Stopped")
observer.join()
I tried using a with(open) statement but I got errors with pdfium and it didn’t make any difference.