Do not know if this problem is posted somewhere else, please direct me to it if that is the case, thanks
I have some data that looks like this
I want to calculate the area under the curve, however as the initial and end point do not go to zero I would like to make a linear regression based on the start and end data and then calculate the area under the data but confined within the linear regressions in something like this
I want to choose the the data for the linear regression and then calculate the area under the data based on the last point in the regression for the pre-baseline and the first point in the regression for the post-baseline that is confined within the linear regression.
I have played a bit with chatGPT to provide me some code that than do what I want but it does not seem to understand that I only want the area between the linear regressions and data
def baseline_correction(data, indices):
# Calculate a unified baseline using linear regression on selected indices
slope, intercept, _, _, _ = linregress(data[indices, 0], data[indices, 1])
return slope, intercept
def calculate_area(data, start_index, end_index, slope, intercept):
# Correct the current by subtracting the baseline
corrected_current = data[start_index:end_index, 1] – (data[start_index:end_index, 0] * slope + intercept)
# Calculate the area (charge) under the corrected current curve
charge = trapz(corrected_current, x=data[start_index:end_index, 0])
return charge, corrected_current
def process_cycles(data, cycle_starts):
charges = []
for i in range(len(cycle_starts) – 1):
start_index = cycle_starts[i]
end_index = cycle_starts[i + 1]
# Selecting only the increasing potential part of the cycle
increasing_potential_indices = np.where(np.diff(data[start_index:end_index, 0]) > 0)[0] + start_index
# Combine pre-peak and post-peak data points for baseline
pre_peak_indices = increasing_potential_indices[np.where((data[increasing_potential_indices, 0] >= 0.35) & (data[increasing_potential_indices, 0] <= 0.4))]
post_peak_indices = increasing_potential_indices[np.where((data[increasing_potential_indices, 0] >= 0.525) & (data[increasing_potential_indices, 0] <= 0.55))]
combined_indices = np.concatenate((pre_peak_indices, post_peak_indices))
# Calculate unified baseline
slope, intercept = baseline_correction(data, combined_indices)
# Define peak region (between 0.35V to 0.4V)
peak_start_idx = np.min(np.where(data[:, 0] >= 0.4)[0])
peak_end_idx = np.max(np.where(data[:, 0] <= 0.525)[0])
# Calculate the charge
charge, corrected_current = calculate_area(data, peak_start_idx, peak_end_idx, slope, intercept)
charges.append(charge)
# Visualization
plt.figure(figsize=(10, 6))
plt.plot(data[start_index:end_index, 0], data[start_index:end_index, 1], label='Original Data', color='gray')
plt.plot(data[combined_indices, 0], data[combined_indices, 1], 'o', label='Baseline Points', color='red')
baseline_curve = data[peak_start_idx:peak_end_idx, 0] * slope + intercept
plt.plot(data[peak_start_idx:peak_end_idx, 0], baseline_curve, label='Baseline', color='green')
plt.plot(data[peak_start_idx:peak_end_idx, 0], corrected_current, label='Corrected Current', color='purple')
plt.fill_between(data[peak_start_idx:peak_end_idx, 0], 0, corrected_current, color='purple', alpha=0.3, label='Area Under Curve')
plt.xlabel('Potential (V)')
plt.ylabel('Current (A)')
plt.title(f'Cycle {i+1} Charge Calculation')
plt.legend()
plt.grid(True)
plt.show()
return charges