I’ve implemented in Python a recursive FFT function and an iterative one. They’re both correct (they both pass all unit tests) but, when I plot the complexity curves, I find that the iterative version is slower than the recursive one (the curves are both O(n lg(n)) but the iterative one is shifted up a little). Can you help me identify what, in the following code, is slowing down my iterative FFT method ? Am I using ThreadPoolExecutor wrong ?
<code>def iterative_fft(a: np.array, inverse=False) -> np.array:
"""
:param a: polynomial in coefficient form
:param inverse: direct or inverse DFT flag
:return: DFT(a) in point form
"""
n = len(a)
a = bit_reverse_copy(a)
for s in range(1, int(np.log2(n)) + 1):
m = 2 ** s
omega_m = np.exp(2 * np.pi * 1j / m)
if inverse:
omega_m **= -1
def butterfly(k):
omega = 1
for j in range(m // 2):
t = omega * a[k + j + m // 2]
u = a[k + j]
a[k + j] = u + t
a[k + j + m // 2] = u - t
omega = omega * omega_m
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(butterfly, range(0, n, m))
if inverse:
a = a / n
return a
def bit_reverse_copy(a: np.array) -> np.array:
"""
:param a: polynomial
:return: reordered array based on index bits reverse
"""
n = len(a)
bit_size = int(np.log2(n))
reversed_a = np.zeros(n, dtype=complex)
for i in range(n):
reversed_index = reverse_bits(i, bit_size)
reversed_a[reversed_index] = a[i]
return reversed_a
def reverse_bits(n, bit_size):
"""
:param n: integer
:param bit_size: bits used for integer representation
:return: reversed bit integer
"""
reverse_n = 0
for i in range(bit_size):
reverse_n = (reverse_n << 1) | (n & 1)
n >>= 1
return reverse_n
</code>
<code>def iterative_fft(a: np.array, inverse=False) -> np.array:
"""
:param a: polynomial in coefficient form
:param inverse: direct or inverse DFT flag
:return: DFT(a) in point form
"""
n = len(a)
a = bit_reverse_copy(a)
for s in range(1, int(np.log2(n)) + 1):
m = 2 ** s
omega_m = np.exp(2 * np.pi * 1j / m)
if inverse:
omega_m **= -1
def butterfly(k):
omega = 1
for j in range(m // 2):
t = omega * a[k + j + m // 2]
u = a[k + j]
a[k + j] = u + t
a[k + j + m // 2] = u - t
omega = omega * omega_m
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(butterfly, range(0, n, m))
if inverse:
a = a / n
return a
def bit_reverse_copy(a: np.array) -> np.array:
"""
:param a: polynomial
:return: reordered array based on index bits reverse
"""
n = len(a)
bit_size = int(np.log2(n))
reversed_a = np.zeros(n, dtype=complex)
for i in range(n):
reversed_index = reverse_bits(i, bit_size)
reversed_a[reversed_index] = a[i]
return reversed_a
def reverse_bits(n, bit_size):
"""
:param n: integer
:param bit_size: bits used for integer representation
:return: reversed bit integer
"""
reverse_n = 0
for i in range(bit_size):
reverse_n = (reverse_n << 1) | (n & 1)
n >>= 1
return reverse_n
</code>
def iterative_fft(a: np.array, inverse=False) -> np.array:
"""
:param a: polynomial in coefficient form
:param inverse: direct or inverse DFT flag
:return: DFT(a) in point form
"""
n = len(a)
a = bit_reverse_copy(a)
for s in range(1, int(np.log2(n)) + 1):
m = 2 ** s
omega_m = np.exp(2 * np.pi * 1j / m)
if inverse:
omega_m **= -1
def butterfly(k):
omega = 1
for j in range(m // 2):
t = omega * a[k + j + m // 2]
u = a[k + j]
a[k + j] = u + t
a[k + j + m // 2] = u - t
omega = omega * omega_m
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(butterfly, range(0, n, m))
if inverse:
a = a / n
return a
def bit_reverse_copy(a: np.array) -> np.array:
"""
:param a: polynomial
:return: reordered array based on index bits reverse
"""
n = len(a)
bit_size = int(np.log2(n))
reversed_a = np.zeros(n, dtype=complex)
for i in range(n):
reversed_index = reverse_bits(i, bit_size)
reversed_a[reversed_index] = a[i]
return reversed_a
def reverse_bits(n, bit_size):
"""
:param n: integer
:param bit_size: bits used for integer representation
:return: reversed bit integer
"""
reverse_n = 0
for i in range(bit_size):
reverse_n = (reverse_n << 1) | (n & 1)
n >>= 1
return reverse_n