I wrote a neural network in PyTorch using the DQN algorithm but got stuck on the fact that the model is not trained
Training statistics, axis-x is number of frames that the neural network lasted
class ReplayBuffer:
def __init__(self, capacity):
self.memory = deque([], maxlen=capacity)
def push(self, data):
self.memory.append(data)
def sample_data(self):
batch = sample(self.memory, BATCH_SIZE)
batch = zip(*batch)
batch = [i for i in batch]
return batch
def __len__(self):
return len(self.memory)
class Deep_Q_Network(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(INPUTS[0], 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, OUTPUTS)
)
self.loss_fn = nn.SmoothL1Loss()
self.optimizer = optim.AdamW(self.parameters(), lr=0.001)
self.eps = EPS_START
def forward(self, X):
return self.model(X)
def Act(self, state):
self.eps = self.eps - EPS_DECAY if self.eps > EPS_END else self.eps
if self.eps > random():
action = randint(0, 2, (1,))
return action
else:
return self.model(state)
def Education(self, Memory, Q_network):
if len(Memory) < BATCH_SIZE:
return
s, a, r, s1 = Memory.sample_data()
s, a, r, s1 = np.array(s), np.array(a), np.array(r), np.array(s1)
s, a, r, s1 = tensor(s), tensor(a), tensor(r), tensor(s1)
with torch.no_grad():
q_target = r + 0.99 * Q_network(s1).max(1).values
action = self.model(s).gather(1, a.unsqueeze(0))
self.optimizer.zero_grad()
loss = self.loss_fn(action.squeeze(), q_target)
loss.backward()
self.optimizer.step()
target_net_state_dict = Q_network.state_dict()
policy_net_state_dict = self.state_dict()
for key in policy_net_state_dict:
target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
Q_network.load_state_dict(target_net_state_dict)
def Reset(self):
for layer in self.model.children():
if hasattr(layer, 'reset_parameters'):
layer.reset_parameters()
I experimented with error functions, optimizers, the number of hidden layers, the number of neurons, the epsilon, and asked ChatGPT for advice.
New contributor
Aiden is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.