When I am training a neural network and I observe that the GPU utilization is at 0% while the CPU utilization is at 100%
enter image description here
If I add ‘to.device’, I get the following error:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA_gather)
My DQN model training code is as follows:
def train_dqn(dqn, target_dqn, optimizer, replay_buffer, num_episodes, batch_size, gamma,
epsilon, epsilon_min,epsilon_decay, risk_coefficient_list, episode_rewards,
pbar,loss_list,resource_consumed_list):
global resource_consumed, risk_coefficient
for episode in range(num_episodes):
state = state_reset()
done = False
episode_reward = 0
scarce_list = []
for i in range(len(state) - 1):
scarce_list.append(state[i] * 0.8)
while not done:
actions = take_action(state, epsilon, dqn)
next_state, done = environment_step(state, actions)
reward, risk_coefficient, resource_consumed = get_reward(next_state, next_state, actions, scarce_list)
replay_buffer.push(state, actions, reward, next_state, done)
state = next_state
episode_reward += reward
if len(replay_buffer) > batch_size:
states, actions, rewards, next_states, dones = replay_buffer.sample(batch_size)
action_indices = torch.multinomial(actions, 1).squeeze(1).long()
current_q = dqn(states.float()).gather(1, action_indices.unsqueeze(1)).squeeze(1)
next_q = target_dqn(next_states.float()).max(1)[0].detach()
expected_q = rewards + gamma * next_q * (~dones)
loss = nn.functional.mse_loss(current_q, expected_q.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
epsilon = max(epsilon_min, epsilon_decay * epsilon)
if episode % 10==0:
target_dqn.load_state_dict(dqn.state_dict())
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
dqn = DQN(state_size_dim, action_size_dim).to(device)
target_dqn = DQN(state_size_dim, action_size_dim).to(device)
target_dqn.load_state_dict(dqn.state_dict())
optimizer = optim.Adam(dqn.parameters())
with tqdm(total=int(num_episodes), desc=f'{algorithm_name.upper()} Iteration %d' % i) as pbar:
train_dqn(dqn, target_dqn, optimizer, replay_buffer, num_episodes, batch_size,
gamma, epsilon, epsilon_min, epsilon_decay,
risk_coefficient_list,
episode_rewards, pbar, loss_list, resource_consumed_list)
New contributor
Airen Chan is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.