Thiết kế website giá rẻ

Question

I have some code in Python which, very briefly explained:

Reads .CSV files and converts them to .xlsx files, which are paired by date-suffixes. So, there are two files for each date, one called “FLIS YYYY-MM-DD” and one called “SEI2 YYYY-MM-DD”
The pairs of .xlsx files are compared to find any differences between the pairs.
Data is contained in 5 columns
- An example from a pair of files, after conversion etc., could look like this
  File 1:
  
  File2:
The code checks if there are any differences between the files. It checks values in row 3 in file 1 against row 4 in file 2 – which is for some reason just the way i could get it to work. If any differences are found between the files, the code tells me so.
In the example above, the values in D3 + D4 in file 1 add up to the value in D4 in file 2. I need the code to add the values in row 3, with the values in row 4, as it is the sum of these values (A3 + A4, B3 + B4 etc.) that is important to me.

The current code looks like this (although changed a little for the purposes of posting here):

import pandas as pd
import os
import re
from datetime import date
from xlsxwriter.workbook import Workbook

# Path
directory = "#The path of the files"

# Expression to find date suffix in CSV for reading files
pattern = r'(d{4}-d{2}-d{2})'

# Dict of ID's
number_to_name = {
    # List removed to shorten post
}

# Function for translating ID
def translate_number(number):
    return number_to_name.get(number, f"Unknown_{number}")

# Function to compare pairs of Excel files
def compare_excel_files(file1, file2, skiprows1=None, skiprows2=None):
    # Read the Excel files
    df1 = pd.read_excel(file1, skiprows=skiprows1)
    df2 = pd.read_excel(file2, skiprows=skiprows2)

    # Compare their dataframes
    differences = df1.compare(df2)

    if not differences.empty:
        # Display which file pairs contain differences
        filename1 = os.path.basename(file1)
        filename2 = os.path.basename(file2)
        print(f"Differences found between {filename1} and {filename2} :")
        print(differences)
        print()  # Add newline between each for clarity
        return True
    return False

# Function to find missing IDs in an Excel sheet
def find_missing_IDs(file_path):
    # Read the Excel file
    df = pd.read_excel(file_path)

    # Find unique ID names in the Excel sheet
    present_IDs = set(df.iloc[-1, :])

    # Find missing IDs
    missing_IDs = set(number_to_name.values()) - present_IDs

    return missing_IDs

# Flag to track if there are differences in the files
differences_found = False

# Go through each file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Find date from CSV filename
        match = re.search(pattern, filename)
        if match:
            date_suffix = match.group(1)
        # Skip files if filename does not match the desired format
        else:
            print(f"Skipping file {filename} as it does not have a valid date suffix.")
            continue

        # Read data from CSV file
        df = pd.read_csv(os.path.join(directory, filename), header=None)

        # Split data in the first column out to the necessary number of columns
        max_commas = df[0].str.split(',').transform(len).max()
        df[[f'name_{x}' for x in range(max_commas)]] = df[0].str.split(',', expand=True)

        # Remove the original unnamed column
        df.drop(0, axis=1, inplace=True)

        # Remove last column (name_0 - column only shows report time)
        df = df[df.columns.drop(list(df.filter(regex='name_0')))]

        # Find SOR number
        ids_row = df.iloc[0]

        # Translate SOR number to ID name
        translated_ids_row = ids_row.apply(translate_number)

        # Create new dataframe with row for ID names
        translated_df = pd.DataFrame([translated_ids_row.tolist()], columns=df.columns)

        # Insert new dataframe onto the original dataframe (new row at the bottom with ID name)
        df = pd.concat([df, translated_df], ignore_index=True)

        # Save the new dataframe as an Excel file
        try:
            # Pull filename without path name
            filename_only = os.path.basename(filename)
            output_name = f"{filename_only.split('_')[2].split('-')[0]} {date_suffix}.xlsx"
            output_path = os.path.join(directory, output_name)
            df.to_excel(output_path, index=None)

            # Check if there is a corresponding file to compare with (date pair), and if there are differences, set differences_found to True
            if "FLIS" in filename:
                sei2_filename = f"SEI2 {date_suffix}.xlsx"
                sei2_file = os.path.join(directory, sei2_filename)
                if os.path.exists(sei2_file):
                    if compare_excel_files(output_path, sei2_file, skiprows1=[2], skiprows2=[3]):
                        differences_found = True

            elif "SEI2" in filename:
                flis_filename = f"FLIS {date_suffix}.xlsx"
                flis_file = os.path.join(directory, flis_filename)
                if os.path.exists(flis_file):
                    if compare_excel_files(output_path, flis_file, skiprows1=[3], skiprows2=[2]):
                        differences_found = True

        # Print message if there is an error when trying to save
        except Exception as e:
            print(f"There was an error when I tried to save: {e}")

# If there are no differences in date-paired files
if not differences_found:
    print("No differences found in any of the files.")
    print()

# Function to ask the user to show missing IDs
def ask_show_missing_IDs():
    while True:
        answer = input("Would you like a list of which IDs did not appear in the data on the different days? (Yes/No): ").strip().lower()
        print()
        if answer in ["yes", "no"]:
            return answer == "yes"
        else:
            print("Please answer with 'Yes' or 'No'.")

# Function to extract the date from the filename
def extract_date(filename):
    # Remove any path and file type name, and then split by space and '-' to get the date
    return filename.split()[-1].split('.')[0]

# Ask the user if they want to see missing IDs
show_missing_IDs = ask_show_missing_IDs()

# Show missing IDs if the user wants it
if show_missing_IDs:
    missing_IDs_found = False
    for filename in os.listdir(directory):
        if filename.endswith(".xlsx") and filename.startswith("FLIS"):
            output_path = os.path.join(directory, filename)
            missing_IDs = find_missing_IDs(output_path)
            if missing_IDs:
                if not missing_IDs_found:
                    print("Here are the missing IDs for each of the following dates:")
                    missing_IDs_found = True
                # Sort the IDs in alphabetical order before printing
                sorted_missing_IDs = sorted(missing_IDs)
                # Only print the date from the filename without "FLIS"
                print(f"{extract_date(filename)}:")
                for ID in sorted_missing_IDs:
                    print(ID)
                print()
    if not missing_IDs_found:
        print("No missing IDs found in any of the files.")

# Function to ask the user to delete the files
def ask_delete_files():
    while True:
        answer = input("Should the CSV and Excel files be automatically deleted? (Yes/No): ").strip().lower()
        print()
        if answer in ["yes", "no"]:
            return answer == "yes"
        else:
            print("Please answer with 'Yes' or 'No'.")

# Ask the user if they want to delete the files after processing
delete_files = ask_delete_files()

# Delete the files if the user wants it
if delete_files:
    for filename in os.listdir(directory):
        if filename.endswith((".csv", ".xlsx")):
            os.remove(os.path.join(directory, filename))
    print("The files have been deleted.")
    print()

# Wait for user input before window closes
input("Press Enter to exit")

I have tried a variety of solutions from ChatGPT and Copilot, as i am no way good at coding – but nothing seems to work. Some of the solutions can sum the values and print it in the output, but i can not get it to add the summed values, which i need for the comparison of files to pass.

Thiết kế website giá rẻ

Danh mục

Python: How to sum values in Excel sheet and save value to new row