This is my code:
from requests_ntlm import HttpNtlmAuth
import requests
from io import BytesIO
import pandas as pd
import openpyxl
auth = HttpNtlmAuth(username = 'userid',
password = '***********')
url = 'https://smchcn-my.sharepoint.com/personal/Documents/TestXLWorkbook1.xlsx'
responseObject = requests.get(url, auth = auth)
file = BytesIO(responseObject.content)
df = pd.read_excel(file, engine='openpyxl')
…and here is the error message I am getting:
Python 3.12.3 (tags/v3.12.3:f6650f9, Apr 9 2024, 14:05:25) [MSC v.1938 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license()" for more information.
= RESTART: C:reposHealthcare-BluebookopenSharepointExcelFile.py
Traceback (most recent call last):
File "C:reposHealthcare-BluebookopenSharepointExcelFile.py", line 15, in <module>
df = pd.read_excel(file, engine='openpyxl')
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagespandasioexcel_base.py", line 495, in read_excel
io = ExcelFile(
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagespandasioexcel_base.py", line 1567, in __init__
self._reader = self._engines[engine](
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagespandasioexcel_openpyxl.py", line 553, in __init__
super().__init__(
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagespandasioexcel_base.py", line 573, in __init__
self.book = self.load_workbook(self.handles.handle, engine_kwargs)
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagespandasioexcel_openpyxl.py", line 572, in load_workbook
return load_workbook(
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagesopenpyxlreaderexcel.py", line 346, in load_workbook
reader = ExcelReader(filename, read_only, keep_vba,
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagesopenpyxlreaderexcel.py", line 123, in __init__
self.archive = _validate_archive(fn)
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libsite-packagesopenpyxlreaderexcel.py", line 95, in _validate_archive
archive = ZipFile(filename, 'r')
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libzipfile__init__.py", line 1349, in __init__
self._RealGetContents()
File "C:UsersmwestleyAppDataLocalProgramsPythonPython312Libzipfile__init__.py", line 1416, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
My input file “TestXLWorkbook1.xlsx” is a valid Excel spreadsheet file which opens fine in Excel.
Why is my Python code expecting the input file here to be a zip file?