I’m working on estimating Maximum Likelihood Estimators (MLE) for a beta distribution using 5000 samples, each with 100 data points. However, my current implementation takes a long time to run.
This is the code i’m using:
def U_score(x, theta):
a = theta[0]
b = theta[1]
n = len(x)
epsilon = 1e-10 # to avoid log(0)
d_a = -sp.digamma(a) + sp.digamma(a + b) + sum(np.log(x + epsilon)) / (n)
d_b = -sp.digamma(b) + sp.digamma(a + b) + sum(np.log(1 - x + epsilon)) / (n)
return np.array((d_a, d_b))
def Hessiana(x, theta):
a = theta[0]
b = theta[1]
h11 = sp.polygamma(1, a + b) - sp.polygamma(1, a)
h12 = sp.polygamma(1, a + b)
h21 = sp.polygamma(1, a + b)
h22 = sp.polygamma(1, a + b) - sp.polygamma(1, b)
return np.array([[h11, h12], [h21, h22]])
def H_inv(x, theta):
H = Hessiana(x, theta)
ridge = 1e-6 # Small constant
H_ridge = H + ridge * np.eye(H.shape[0])
return np.linalg.inv(H_ridge)
def max_likelihood(x, theta, tol):
iter = 0
while iter < 1000:
theta_new = theta - H_inv(x, theta) @ U_score(x, theta)
if np.linalg.norm(theta_new - theta) <= tol:
return theta_new, iter + 1
theta = theta_new
iter += 1
print('Não convergiu!')
return theta_new, iter
def mse(arr, theta_real):
a = theta_real[0]
b = theta_real[1]
eqm_a = np.mean((arr[:,0] - a)**2)
eqm_b = np.mean((arr[:,1] - b)**2)
return mse_a, mse_b
arr = np.array([])
n_amostras = 5000
theta = np.array([1,1])
for i in range(0, n_amostras):
x = st.beta.rvs(2, 5, size = 100)
emv, iteracoes = max_likelihood(x, theta, 1e-6)
arr = np.append(arr, emv)
arr = arr.reshape(-1, 2)
print(arr)
The last part which i need to calculate is the problem:
arr = np.array([])
n_amostras = 5000
theta = np.array([1,1])
for i in range(0, n_amostras):
x = st.beta.rvs(2, 5, size = 100)
emv, iteracoes = max_likelihood(x, theta, 1e-6)
arr = np.append(arr, emv)
arr = arr.reshape(-1, 2)
print(arr)
I’d appreciate any suggestions on how to optimize this process and calculate the Mean Squared Error (MSE) more efficiently. Thank you!