I am using the Microsoft Graph API in Python to upload a large .zip file (approximately 90 GB) to OneDrive. However, when uploading the file in chunks, the process stops with a 503 Server Error after uploading about 3.88% of the file. Here’s the relevant portion of my code and the error message:
import requests
from msal import PublicClientApplication
from dotenv import load_dotenv
import os
import time
# Load environment variables
load_dotenv()
# Configuration from environment variables
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET') # This might not be necessary for public clients
USER_ID = os.getenv('USER_ID')
authority_url = 'https://login.microsoftonline.com/consumers'
scopes = ['Files.ReadWrite.All']
app = PublicClientApplication(CLIENT_ID, authority=authority_url)
# Redirect URI after authentication
redirect_uri = 'http://localhost:8000/callback'
url = app.get_authorization_request_url(scopes, redirect_uri=redirect_uri)
print("Please go to this URL and sign-in:", url)
# After sign-in, receive a callback with a code
code = input("Enter the code you received: ")
result = app.acquire_token_by_authorization_code(code, scopes=scopes, redirect_uri=redirect_uri)
if 'access_token' in result:
access_token = result['access_token']
print("Access token acquired.")
else:
print(result.get('error'))
print(result.get('error_description'))
exit(1)
def create_upload_session(access_token, file_name):
headers = {
'Authorization': 'Bearer ' + access_token,
'Content-Type': 'application/json'
}
url = f'https://graph.microsoft.com/v1.0/users/{USER_ID}/drive/root:/{file_name}:/createUploadSession'
response = requests.post(url, headers=headers)
response.raise_for_status()
return response.json()
def upload_file_in_chunks(upload_url, file_path, chunk_size=327680, max_retries=99999):
with open(file_path, 'rb') as file:
file_size = os.path.getsize(file_path)
for i in range(0, file_size, chunk_size):
chunk_data = file.read(chunk_size)
headers = {
'Content-Length': str(len(chunk_data)),
'Content-Range': f'bytes {i}-{i + len(chunk_data) - 1}/{file_size}'
}
retries = 0
while retries < max_retries:
try:
response = requests.put(upload_url, headers=headers, data=chunk_data, timeout=99999)
response.raise_for_status()
uploaded = min(i + len(chunk_data), file_size)
print(f'Uploaded {uploaded} out of {file_size} bytes ({uploaded * 100 / file_size:.2f}%)')
break
except (requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError) as e:
retries += 1
print(f"Retrying ({retries}/{max_retries}) due to: {e}")
time.sleep(2 ** retries) # Exponential backoff
if retries == max_retries:
raise
root_directory = '/media/user/backup/Datasets/dt/included'
onedrive_folder = 'dt'
# Walk through the directory structure
for dirpath, dirnames, filenames in os.walk(root_directory):
for file in filenames:
file_path = os.path.join(dirpath, file)
# Create a relative path
relative_path = os.path.relpath(dirpath, root_directory)
# Construct the OneDrive path
if relative_path == '.':
onedrive_path = f'{onedrive_folder}/{file}'
else:
onedrive_path = f'{onedrive_folder}/{relative_path}/{file}'
# Create upload session
upload_session = create_upload_session(access_token, onedrive_path)
upload_url = upload_session['uploadUrl']
print("Upload session created for:", onedrive_path)
# Upload the file in chunks
upload_file_in_chunks(upload_url, file_path)
print("File uploaded successfully:", onedrive_path)
Error Message:
Uploaded 3557949440 out of 91760143565 bytes (3.88%)
Traceback (most recent call last):
File "upload_files2onedrive.py", line 90, in <module>
upload_file_in_chunks(upload_url, file_path)
File "upload_files2onedrive.py", line 60, in upload_file_in_chunks
response.raise_for_status()
File "/home/user/miniconda3/envs/env_env/lib/python3.8/site-packages/requests/models.py", line 1024, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 503 Server Error: Service Unavailable for url: https://api.onedrive.com/rup/632894ca32b6d7e2/eyJSZXNvdXJjZUlEIjoiNjMyODk0Q0EzMkI2RDdFMiE5MDEzNSIsIlJlbGF0aW9uc2hpcE5hbWUiOiJlbnZpc2FnZV9pbWFnZXMuemlwIn0/4m4Iel9zAJ-DTK4Z-0XgqJCBoqVhqzSJR9n30g-yXX02o5g3VqRn2v-CeLr_oji09p46RSUax9XjUOre2BbZ41fTy0jBiBo8rNYfXYUZDTMP4/eyJuYW1lIjoiZW52aXNhZ2VfaW1hZ2VzLnppcCIsIkBuYW1lLmNvbmZsaWN0QmVoYXZpb3IiOiJyZXBsYWNlIn0/4wx5ZnNJQxqvpkLixCL_EdPL39dXS5XQgOIIZnYRFzpPreu5Gudl15YUMU1uuPJiTcnwVogDXD65HSaMdMCk54IhZ2fDTjZEu5x-kTrSONZbNfj57Ein68mli6qMgTazoQv5BDO3MyAel6o_lvmXabSpJauoaGAPNBBvUlRY1FDtk9yOd1f-XgUHSIxc1KJnAUwgRknFQvm7D7a_wtiBOikmibOA3k1nMAeiBHCQVQX96ls9RIoJCcvd8wOkszlcM2E9uRfV9PVn_V4LNI-RwPmOrf-GK7DCT7AK2mo5RiQpP1xlR25msik4M8f7MqusuwNuCXZXWfFi6YJqTF2i7xkG7VJkyjqZOuvPSJlkw74iE_DDvD_0cgtV0jNFt17GCO4_8yulHbhIA8TzOfduoVA_oy2P4ZohXxNj98xrlnW34G9QA4xh6BJIQxSw83tazaTE5nXsYzYpMvblK3ryZJu6qCPc2hQmreb7BZHtrg28MUAELAFJpvURntZr-qTNCBg8EJcwRGT9xNxDUGUO6lOcxE2FUrNG7DnzMIEjS8_t1RLUdQ6EsGOvwAr8o-SBaQ
The error occurs intermittently, suggesting it might be due to server availability or throttling issues. Is there a way to handle these errors more effectively, or is there a recommended practice for handling such large file uploads to OneDrive through the Graph API?
Recognized by Microsoft Azure Collective