I have been provisioned Azure Devbox environment. Storage containers can be accessed only within devbox.
For Regression Testing ,I want to compare the files stored in azure storage containers vs my existing production files whichbis in local drive and list any mismatch in any of the fields
What tools i can use to automate this?
I want this to be done automatically using any tools and save mismatch details in a sharepoint to business.
I want to compare the files stored in azure storage containers vs my existing production files whichbis in local drive and list any mismatch in any of the fields
What tools i can use to automate this?
I want this to be done automatically using any tools and save mismatch details in a sharepoint to business.
You can use the schedule tool like Windows Task Scheduler
to automate the process.
Here is the python code that will compare the azure storage files with local drive.
Code:
import os
import pandas as pd
from azure.storage.blob import BlobServiceClient
AZURE_CONNECTION_STRING = "xxxxxx"
CONTAINER_NAME = "result"
# Local Directory Configuration
LOCAL_DIRECTORY = r"C:Usersv-vsettuOneDrive - MicrosoftDocumentsvenkat1"
def get_azure_blob_metadata(connection_string, container_name):
"""Retrieve file names and sizes from Azure Blob Storage."""
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)
azure_files = {}
for blob in container_client.list_blobs():
azure_files[blob.name] = blob.size # Store name and size
return azure_files
def get_local_file_metadata(local_directory):
"""Retrieve file names and sizes from a local directory."""
local_files = {}
for file in os.listdir(local_directory):
file_path = os.path.join(local_directory, file)
if os.path.isfile(file_path):
local_files[file] = os.path.getsize(file_path) # Store name and size
return local_files
def compare_files(local_files, azure_files):
"""Compare file names and sizes between local and Azure."""
mismatches = []
# Compare local files with Azure blobs
for file_name, file_size in local_files.items():
azure_size = azure_files.get(file_name)
if azure_size is None:
mismatches.append(f"File '{file_name}' is missing in Azure Storage.")
elif azure_size != file_size:
mismatches.append(f"Size mismatch for '{file_name}': Local={file_size}, Azure={azure_size}")
# Compare Azure files with local files
for file_name in azure_files:
if file_name not in local_files:
mismatches.append(f"File '{file_name}' is missing in local directory.")
return mismatches
def save_mismatch_report(mismatches):
"""Save the mismatch report as a CSV file."""
report_filename = "MismatchReport.csv"
df = pd.DataFrame(mismatches, columns=["Mismatch Details"])
df.to_csv(report_filename, index=False)
return report_filename
def main():
azure_files = get_azure_blob_metadata(AZURE_CONNECTION_STRING, CONTAINER_NAME)
local_files = get_local_file_metadata(LOCAL_DIRECTORY)
mismatches = compare_files(local_files, azure_files)
if mismatches:
report_filename = save_mismatch_report(mismatches)
print(f"Mismatch report generated: {report_filename}")
else:
print("No mismatches found.")
if __name__ == "__main__":
main()
Output:
If mismatch found it will be saved as csv file in local drive.
Mismatch report generated: MismatchReport.csv
You can refer this SO thread by Sridevi, to upload the csv file from local to sharepoint using python.
Reference:
How to Automate Python Scripts with Task Scheduler (Windows example) – JC Chouinard