class BernoulliApproximator(nn.Module):
def __init__(self, hidden_dim):
super().__init__()
self.linear1 = nn.Linear(2, hidden_dim)
self.linear2 = nn.Linear(hidden_dim, hidden_dim)
self.linear3 = nn.Linear(hidden_dim, hidden_dim)
self.linear4 = nn.Linear(hidden_dim, 1)
self.relu = nn.ReLU()
def forward(self, x):
out = self.relu(self.linear1(x))
out = self.relu(self.linear2(out))
out = self.relu(self.linear3(out))
out = torch.sigmoid(self.linear4(out))
return out
model = torch.load('bernoullimodel9.pth',map_location=device)
class BernoulliSampleFunction(torch.autograd.Function):
@staticmethod
def forward(ctx, probabilities, random_numbers):
result = torch.zeros_like(probabilities)
inputs = []
outputs = []
for i in range(probabilities.shape[1]):
with torch.enable_grad():
input = torch.cat((probabilities[:, i].unsqueeze(1).double(), random_numbers[:, i].unsqueeze(1).double()), dim=1).clone().requires_grad_(True)
inputs.append(input)
output = model(input)
outputs.append(output)
result[:, i] = output.squeeze().detach()
inputLength = torch.tensor(len(inputs))
inputs.extend(outputs)
ctx._dict = model.state_dict()
ctx.save_for_backward(inputLength, *inputs)
return result
@staticmethod
def backward(ctx, grad_output):
print("grad_output: ",grad_output)
inputLength, *inputs = ctx.saved_tensors
outputs = inputs[inputLength:]
inputs = inputs[:inputLength]
toReturn = torch.zeros_like(grad_output)
toReturn2 = torch.zeros_like(grad_output)
torch.set_grad_enabled(True)
with torch.enable_grad():
model = BernoulliApproximator(32)
# model.load_state_dict(ctx._dict)
for param in model.parameters():
print("PARAM 1: ",param)
for i in range(toReturn.shape[1]):
input = inputs[i].float()
print("input: ",input)
output = model(input)
print("output: ",output)
delta, *g_pars = autograd.grad(output, [input] + list(model.parameters()), grad_output[:,i].unsqueeze(1).requires_grad_(),allow_unused=True)
print("delta: ",delta)
toReturn[:,i] = delta[:,0]
toReturn2[:,i] = delta[:,1]
print("toReturn: ",toReturn)
return toReturn, toReturn2
Trying to get the gradients with the respect to the input of model, by recreating the model inside of torch.enable_grad(), feeding it the same inputs then calling autograd.grad. autograd.grad works correctly only when the model has been initialized, but if I make any changes to the values of its weights/biases then the gradient is 0, no matter what. I’ve tried every method of copying over the weights and biases of the trained model to the new instance; .copy_() with torch.no_grad(), param.data = savedParamTensor. If I call .copy_() directly on it, it causes a in-place modification error. Calling autograd.grad on the saved output also has the same result.
autograd.grad functions only when working with a freshly initalized model, but it’s not possible to change the weights and have the computational map update also. Including if you change them in the .init call.