My problem is that in my newly created file the data is a mess. Therefore I want to ask what I did wrong in my code. It should look like this:
737224975,69450.10000000,0.00002000,1.38900200,1716854400003,False,True
737224976,69450.10000000,0.00010000,6.94501000,1716854400003,False,True
737224977,69450.10000000,0.00010000,6.94501000,1716854400003,False,True
But for some reason it looks like this:
735704412,69065.44000000,0.00291000,200.98043040,1716819665729,False,True,737224975,69450.10000000,0.00002000,1.38900200,1716854400003
735704413.0,69065.47,0.02,1381.3094,1716819665839.0,True,True,,,,,
735704414.0,69065.46,0.0001,6.906546,1716819665839.0,True,True,,,,,
735704415.0,69065.46,0.03294,2275.0162524,1716819665839.0,True,True,,,,,
735704416.0,69065.44,0.07247,5005.1724368,1716819665839.0,True,True,,,,,
735704417.0,69065.42,0.0001,6.906542,1716819665839.0,True,True,,,,,
735704418.0,69065.24,0.0001,6.906524,1716819665839.0,True,True,,,,,
735704419.0,69064.68,0.02,1381.2936,1716819665844.0,True,True,,,,,
735704420.0,69064.68,0.02588,1787.3939184,1716819665844.0,True,True,,,,,
735704421.0,69064.55,0.0001,6.906455,1716819665844.0,True,True,,,,,
,,,,,False,True,737224976.0,69450.1,0.0001,6.94501,1716854400003.0
This is my python file, which sorts my files after their date and then merges those in sequence and creates a new file:
import os
import re
import pandas as pd
# Define the directory where your CSV files are located
current_directory = os.getcwd()
print(current_directory)
# Construct the path to the "data" folder in the parent directory
directory = os.path.join(current_directory, "staticdata")
print(directory)
# Define the naming scheme pattern
pattern = re.compile(r'BTCFDUSD-trades-(d{4}-d{2}-d{2}).csv')
# Function to extract date from filename
def extract_date(filename):
match = pattern.search(filename)
if match:
return match.group(1)
else:
return None
# Get list of CSV files in the directory
csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
# Sort the files based on date
csv_files_sorted = sorted(csv_files, key=lambda x: extract_date(x))
# Process the files in sorted order
for filename in csv_files_sorted:
# Your processing logic goes here, for example:
# with open(os.path.join(directory, filename), 'r') as file:
# data = file.read()
print(filename)
# Check if there are any files to process
if not csv_files_sorted:
print("No CSV files found to process.")
else:
# Read and concatenate the CSV files in sorted order
merged_data = pd.DataFrame()
for filename in csv_files_sorted:
file_path = os.path.join(directory, filename)
df = pd.read_csv(file_path)
merged_data = pd.concat([merged_data, df], ignore_index=True)
# Extract the first and last date for the new file name
first_date = extract_date(csv_files_sorted[0])
last_date = extract_date(csv_files_sorted[-1])
# Define the new filename
new_filename = f"BTCFDUSD-trades_{first_date}_to_{last_date}.csv"
new_file_path = os.path.join(directory, new_filename)
# Save the merged dataframe to the new CSV file
merged_data.to_csv(new_file_path, index=False)
print(f"Merged file saved as: {new_file_path}")
Changing the method of merging meaning concat() and append().