My script I’m running in python is below. The script pulls all orders from a shopify store and adds them into a table in my excel. The script is working correctly however after running it and opening back my excel I get an error stating, “We found a problem with some content in ‘Analytics.xlsx’. Do you want us to try to recover as much as we can? If you trust this workbook, click Yes.”.
I click yes and each time it then deletes all my power queries and any external connections in my excel. My power queries do not have any reference to the table I am adding rows to through my script. If I make a copy of the excel and delete my power queries and external connections first I do not get any error either.
I’m running out of ideas and do not know why it keeps breaking the connections. Any help is appreciated.
import os
import requests
import pandas as pd
import sys
from time import sleep
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.worksheet.table import Table
from openpyxl.utils import get_column_letter
# Shopify API credentials and initial URL
SHOP_NAME = 'xyz'
ACCESS_TOKEN = 'token'
BASE_URL = f"https://{SHOP_NAME}.myshopify.com/admin/api/2024-04/orders.json"
headers = {
"X-Shopify-Access-Token": ACCESS_TOKEN,
"Content-Type": "application/json"
}
def fetch_orders_since(last_date):
orders = []
url = BASE_URL + f"?status=any&limit=250&created_at_min={last_date}"
while url:
response = requests.get(url, headers=headers)
response.raise_for_status()
data = response.json()
orders.extend(data['orders'])
link_header = response.headers.get('link')
next_url = None
if link_header:
links = link_header.split(',')
for link in links:
if 'rel="next"' in link:
next_url = link.split(';')[0].strip()[1:-1]
break
url = next_url
return orders
def fetch_metafields(order_id):
metafields_url = f"https://{SHOP_NAME}.myshopify.com/admin/api/2024-04/orders/{order_id}/metafields.json"
response = requests.get(metafields_url, headers=headers)
response.raise_for_status()
return response.json().get('metafields', [])
def add_metafields_to_orders(orders):
for order in orders:
order_id = order['id']
metafields = fetch_metafields(order_id)
order['metafields'] = metafields
sleep(0.5)
return orders
def convert_type(value, target_type):
if target_type == 'str':
return str(value)
elif target_type == 'int':
try:
return int(value)
except ValueError:
return 0
elif target_type == 'float':
try:
return float(value)
except ValueError:
return 0.0
elif target_type == 'bool':
return bool(value)
elif target_type == 'datetime':
return pd.to_datetime(value, errors='coerce')
else:
return value
def get_column_types(ws, table):
column_types = []
table_range = table.ref
start_cell, end_cell = table_range.split(':')
start_row = ws[start_cell].row
headers = [cell.value for cell in ws[start_row]]
for column_name in headers:
column_letter = ws[start_row][headers.index(column_name)].column_letter
cell_value = ws[f"{column_letter}{start_row + 1}"].value # Assuming data starts from the row below headers
column_types.append(type(cell_value).__name__)
return column_types
def append_orders_to_excel(orders, file_path, sheet_name='Shopify Orders', table_name='orders'):
df = pd.json_normalize(orders)
wb = load_workbook(file_path)
ws = wb[sheet_name]
table = ws.tables.get(table_name)
if table is None:
raise ValueError(f"Table {table_name} not found in sheet {sheet_name}")
column_types = get_column_types(ws, table)
if not df.empty:
for r in dataframe_to_rows(df, index=False, header=False):
converted_row = [convert_type(value, column_types[idx]) for idx, value in enumerate(r)]
ws.append(converted_row)
# Update the table range to include new rows
start_cell = table.ref.split(':')[0]
end_cell = f"{get_column_letter(ws.max_column)}{ws.max_row}"
table.ref = f"{start_cell}:{end_cell}"
wb.save(file_path)
if __name__ == "__main__":
last_date = sys.argv[1] if len(sys.argv) > 1 else '2024-05-26T04:00:00'
file_path = r'C:pathtoAnalytics.xlsx'
orders = fetch_orders_since(last_date)
orders_with_metafields = add_metafields_to_orders(orders)
append_orders_to_excel(orders_with_metafields, file_path)
print("Orders have been successfully appended to the Excel file")