Is there a difference between the following two implementations based on computational performance and memory consumption?
The difference is in storing an intermediate computation into a separate variable (S_corrected) first or not.
1)
def step(self, data_deriv: npt.NDArray) -> npt.NDArray:
self.t += 1
self.S = self.beta * self.S + (1 - self.beta) * data_deriv ** 2
return data_deriv / np.sqrt((self.S / (1 - self.beta ** self.t)) + self.epsilon)
and
2)
def step(self, data_deriv: npt.NDArray) -> npt.NDArray:
self.t += 1
self.S = self.beta * self.S + (1 - self.beta) * data_deriv ** 2
S_corrected = (self.S / (1 - self.beta ** self.t))
return data_deriv / np.sqrt(S_corrected + self.epsilon)