I perform Bayesian inference on a mixture model such that, μ is the mixture weight for a feature in the mixture
p(x|μ,theta) = μ p_feature(x|theta) + (1-μ) p_nofeature(x|theta).
I plot T(μ) = log( p(x|μ, theta(μ)) | p(x | μ*, theta*), where x is fixed,
theta(μ) = arg max_theta p(x | μ, theta)
μ*, theta* = arg max_{μ,theta) p(x | μ, theta)
I donot have access to the likelihood, so I find the two solutions above by maximzing the
p(μ, theta|x) / p(μ, theta) = p(x | μ, theta) / p(x)
I use the following code to implement this,
CODE:
def prior_fn(theta):
p = theta[-2]
g = torch.rand(1)
if (g>=p):
theta[23] = -9.9
theta[24] = -5.9
theta[25] = 14.9
theta[26] = 12.9
theta[27] = 2.9
return theta
def calc_LLR(q):
'''
estimator_m is the model that estimates the posterior
runpath_m is the path to the saved model
LOWERp - lower ranges of the prior
UPPERp - upper ranges of the prior
theta - 32 parameters, and in the numerator i optimize for 31 parameters (technically 26 because i keep the 5 other params from theta[24:29] constant) )
thetai - initial parameter which is the best-fit of the model
simulator - where x = simulator(theta) --> p(x|theta)
'''
states_m = torch.load(runpath_m , map_location='cpu')
estimator_m.load_state_dict(states_m['estimator'])
prior_m = BoxUniform(torch.tensor(LOWERp).cuda(), torch.tensor(UPPERp).cuda())
def cal_ratio(th, prior, posterior):
priord = torch.exp( prior.log_prob( th ) )
posteriord = torch.exp( posterior.log_prob( th ) )
ratio = posteriord / priord
return ratio.cpu().detach()
bounds = [(LOWERp[k], UPPERp[k]) for k in range(32)]
def optimizing_num( prior, posterior, thetai , X0 , P0, f0, K0, s0, p0):
def objective_function_numerator(theta, X, P, f, K, s, p): #26 params
theta = torch.from_numpy(theta)
theta_combined = torch.hstack(( theta[:-3], torch.Tensor([X, P, f, K, s, p]), theta[-3:] ))
ratio = cal_ratio(theta_combined.float().cuda(), prior, posterior)
return -np.log(ratio)
X0, P0, f0, K0, s0 = X0, P0, f0, K0, s0
p0 = p0
while True:
start = time.time()
try:
init_theta_num = torch.hstack(( thetai[:23], thetai[29:] ))
res_num = minimize(
objective_function_numerator,
method = 'Powell',
x0= init_theta_num,
args= (X0, P0, f0, K0, s0, p0) ,
bounds= np.vstack(( bounds[:23] , bounds[29:] )),
)
except ValueError:
continue
end = time.time()
print( ' took ', (end-start)/60 , 'min to optimise numerator')
break
return res_num
def optimizing_den( prior, posterior, thetai):
def objective_function_denominator(theta): #32 parameters
theta = prior_fn (theta)
ratio = cal_ratio(torch.from_numpy(theta).float().cuda(), prior, posterior)
return -np.log(ratio)
options = {'returna_all': False}
while True:
start = time.time()
try:
init_theta = prior_fn( thetai )
res_den = minimize(
objective_function_denominator,
method = 'Powell',
x0= init_theta,
bounds= bounds,
options = options,
)
except ValueError:
continue
end = time.time()
print( ' took ', (end-start)/60 , 'min to optimise denominator')
break
return res_den
def making_T(xobs= [torch.from_numpy(x_observation)], num_of_samples= None):
xx = xobs
theta_forx = None
if num_of_samples is not None :
theta_forx = torch.vstack( [ prior_fn( prior_m.sample(( 1, ))[0].cpu() ) for _ in range(num_of_samples) ] )
xx = simulator(theta_forx)
mixture_post = {}
index = {}
th_num = {}
th_den = {}
ratio_num = {}
ratio_den = {}
lr = {}
x_with_llr = []
for j, x in enumerate(xx) :
## sampling from the mixture posterior
thetam = mixture_post[str(j)].sample(128)
log_p = mixture_post[str(j)].log_prob(thetam)
## finding the most probable theta
index[str(j)] = np.where(log_p == max(log_p))[0]
th_num[str(j)] = {}
th_den[str(j)] = {}
ratio_num[str(j)] = {}
ratio_den[str(j)] = {}
lr[str(j)] = {}
X0, P0, f0, K0, s0, p0 = -9.9, -5.9 , 14.9, 12.9, 2.9, 0
len_optim = 1
print()
print('sample ', str(j))
for i in range(len_optim):
print(' optimising numerator ', str(i), '......')
optnum = optimizing_num(prior_m, mixture_post[str(j)], thetam[index[str(j)]][0],
X0, P0, f0, K0, s0, p0)
th_num[str(j)][str(i)] = torch.hstack(( torch.from_numpy( optnum.x[:-3] ),
torch.Tensor([X0, P0, f0, K0, s0, p0]),
torch.from_numpy( optnum.x[-3:] )
))
print(' optimising denominator ', str(i), '......')
th_den[str(j)][str(i)] = torch.from_numpy( optimizing_den( prior_m, mixture_post[str(j)],
thetam[index[str(j)]][0] ).x )
ratio_num[str(j)][str(i)] = cal_ratio(th_num[str(j)][str(i)].float().cuda(), prior_m,
mixture_post[str(j)])
ratio_den[str(j)][str(i)] = cal_ratio(th_den[str(j)][str(i)].float().cuda(), prior_m,
mixture_post[str(j)])
lr[str(j)][str(i)] = torch.log(ratio_num[str(j)][str(i)]/ratio_den[str(j)][str(i)])
print()
print()
x_with_llr.append(j)
return theta_forx, xx, lr, x_with_llr
theta_forx, xx, lr_list, x_with_llr = making_T(num_of_samples= 20)
[](https://i.sstatic.net/GPtv3XnQ.png)
[](https://i.sstatic.net/12ZdSW3L.png)
I get a chi2 of degrees of freedom 19 and almost a uniform distribution for the second model. I would expect a chi2 of degree of freedom 1. What am i doing wrong?