import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import resample
from scipy.signal._peak_finding_utils import _local_maxima_1d
def select_by_peak_distance(peaks, priority, distance):
"""
Evaluate which peaks fulfill the distance condition.
Parameters
----------
peaks : ndarray
Indices of peaks in `vector`.
priority : ndarray
An array matching `peaks` used to determine priority of each peak. A
peak with a higher priority value is kept over one with a lower one.
distance : np.float64
Minimal distance that peaks must be spaced.
Returns
-------
keep : ndarray[bool]
A boolean mask evaluating to true where `peaks` fulfill the distance
condition.
"""
peaks_size = peaks.shape[0]
# Round up because actual peak distance can only be natural number
distance = math.ceil(distance)
keep = np.ones(peaks_size, dtype=np.uint8) # Prepare array of flags
# Create map from `i` (index for `peaks` sorted by `priority`) to `j` (index
# for `peaks` sorted by position). This allows to iterate `peaks` and `keep`
# with `j` by order of `priority` while still maintaining the ability to
# step to neighbouring peaks with (`j` + 1) or (`j` - 1).
priority_to_position = np.argsort(priority)
for i in range(peaks_size - 1, -1, -1):
# "Translate" `i` to `j` which points to current peak whose
# neighbours are to be evaluated
j = priority_to_position[i]
if keep[j] == 0:
# Skip evaluation for peak already marked as "don't keep"
continue
k = j - 1
# Flag "earlier" peaks for removal until minimal distance is exceeded
while 0 <= k and peaks[j] - peaks[k] < distance:
keep[k] = 0
k -= 1
k = j + 1
# Flag "later" peaks for removal until minimal distance is exceeded
while k < peaks_size and peaks[k] - peaks[j] < distance:
keep[k] = 0
k += 1
return keep.astype(np.bool_)
sr, x = wavfile.read("jfk.wav")
x = x.astype(np.float64, order='C') / 32768.0 # Normalize (-1.0, 1.0)
x = resample(x, len(x)//8)
peaks, _, _ = _local_maxima_1d(x)
# Filter peaks all above 0
peaks = np.array([p for p in peaks if x[p] > 0])
keep = select_by_peak_distance(peaks, x[peaks], 20)
peaks = peaks[keep]
valley, _, _ = _local_maxima_1d(-x)
# Filter valley all below 0
valley = np.array([v for v in valley if x[v] < 0])
keep = select_by_peak_distance(valley, -x[valley], 20)
valley = valley[keep]
fig = plt.figure(figsize=(12, 6), dpi=80)
gs = fig.add_gridspec(1, hspace=0)
axs = gs.subplots()
axs.axhline(y = 0.0, color = 'lightgray', ls='-', lw=1.0)
axs.plot(x, lw=1)
axs.plot(peaks, x[peaks], "o", color='green')
axs.plot(valley, x[valley], "o", color='red')
fig.tight_layout()
plt.show()
plt.close()
select_by_peak_distance is the python version of Cython C code.
- (G1, R1), (G2, R2), (G3, R3) pairs are the correct set of maxima and minima points.
- The rest of the maxima points are wrong because they are just one index ahead from the correct position.
- Resampling produces blunt or flat peaks, which is obvious and I believe that’s the reason that sometimes the peaks are not getting selected. (bottom right of the images) If you increase the number of points by
x = resample(x, len(x)//4)
The missing peaks problem is getting resolved but that’s not what I’m looking for.
- In the case of minima the wrong point is one index less from the correct position.
If we can solve the problem of maxima, getting the correct minima will be solved by itself. The Left of the maxima (lowest dip) will be the correct minima point.
I have tried a few things with “select_by_peak_distance” method but couldn’t able to get the desired result.