Thiết kế website giá rẻ

Question

I am trying to create a Q learning maze that solves the maze and uses a rewards system where hitting the wall is -1 and finishing is 100, however, when I reach the goal, my total rewards are always 100 regardless of how many walls I hit, how do I fix this problem? I tried adding print statements for when the values are updated but they never printed? I am not sure why? maze

import random
import time
import copy


Initializes the Q-table for all positions in the maze
Each position is given a list of 4 Q-values, corresponding to 4 directions
def Q_table_init(maze):
Q_table = {}
for y in range(1, len(maze) - 1):
for x in range(1, len(maze[0]) - 1):
pos = (y, x)
Q_table[pos] = [0.00, 0.00, 0.00, 0.00]
return Q_table


Prints the Q-values in a formatted way for easier reading
def print_Qdict(d):
for key, value in d.items():
formatted_values = ["{:.2f}".format(num) for num in value]
print(f"{key}: {formatted_values}")


General function to print a dictionary in key: value format
def print_dict(d):
for key, value in d.items():
print(f"{key}: {value}")


Reads the maze structure from a file and returns a list of lists representing the maze
def read_maze(file_path):
with open(file_path, 'r') as file:
maze = [list(line.strip()) for line in file]
return maze


Finds and returns the start position in the maze
def find_start(maze):
for y, row in enumerate(maze):
for x, col in enumerate(row):
if col == 'S':  # Assuming 'S' represents the start position
return (y, x)
return None


Prints the maze layout
def print_maze(maze):
for row in maze:
print(' '.join(row))


Sets up the reward system for the environment based on the maze structure
def Env_rewards(Env):
rewards = {}
for i in range(len(Env)):
for j in range(len(Env[0])):
pos = (i, j)
if pos not in rewards:
rewards[pos] = 0
if Env[pos[0]][pos[1]] == "G":
rewards[pos] += 100  # Reward for reaching the goal
print(rewards)
elif Env[pos[0]][pos[1]] == "#":
rewards[pos] -= 1  # Penalty for hitting a wall
print(rewards)
elif Env[pos[0]][pos[1]] == " ":
rewards[pos] += 0  # Neutral reward for empty spaces

return rewards


Determines the next possible states from every position in the maze
def Env_next_states(maze, start_pos):
next_states = {}
for y in range(1, len(maze) - 1):
for x in range(1, len(maze[0]) - 1):
pos = (y, x)
moves = []

up
if maze[y - 1][x] == 'G' or maze[y - 1][x] == ' ':
up_move = (y - 1, x)
else:
up_move = (y, x)
moves.append(up_move)

down
if maze[y + 1][x] == 'G' or maze[y + 1][x] == ' ':
down_move = (y + 1, x)
else:
down_move = (y, x)
moves.append(down_move)

left
if maze[y][x - 1] == 'G' or maze[y][x - 1] == ' ':
left_move = (y, x - 1)
else:
left_move = (y, x)
moves.append(left_move)

right
if maze[y][x + 1] == 'G' or maze[y][x + 1] == ' ':
right_move = (y, x + 1)
else:
right_move = (y, x)
moves.append(right_move)

next_states[pos] = moves

return next_states


Returns the next position based on the current position and direction of movement
def reward_pos(pos, direction):
if direction == "up":
return (pos[0] - 1, pos[1])
if direction == "down":
return (pos[0] + 1, pos[1])
if direction == "left":
return (pos[0], pos[1] - 1)
if direction == "right":
return (pos[0], pos[1] + 1)

Simulates the environment's response to an action


Returns the next state and the associated reward
def Evironment(state, action, env_rewards, env_next_states):
dir_d = {0: "up", 1: "down", 2: "left", 3: "right"}
reward = env_rewards[reward_pos(state, dir_d[action])]
next_state = (env_next_states[state])[action]
return (next_state, reward)


Updates the Q-table based on the observed state, reward, and action
Uses the Q-learning algorithm to update the values
def Q_table_update(Q_table, state, obs, reward, action, alpha, gamma):
    '''
    #
Add function code here
to update the Q table according the algorithm formula
    #
    '''

currentQValue = Q_table[state][action]
maxQValue = max(Q_table[obs])
newQValue = currentQValue + alpha * (reward + gamma * maxQValue - currentQValue)

Q_table[state][action] = newQValue

return Q_table


Determines the best action (with the highest Q-value) at a given state
Randomly chooses between actions if there are multiple best actions
def best_q_action(Q_table, state):
    '''
    #
Add function code here
to choose the best action according to the Qtable
    #
    '''

actions = Q_table[state]
maxQValue = max(actions)
bestActions = [i for i, q in enumerate(actions) if q == maxQValue]
return random.choice(bestActions)


Chooses an action based on the epsilon-greedy strategy
Randomly explores with a probability of epsilon, otherwise exploits the best action
def q_learning_choice(Q_table, state, epsilon):
    '''
    #
Add function code here
to implement epsilon greedy policy
    #
    '''
if random.random() > epsilon:
return random.randint(0, 3)
else:
return best_q_action(Q_table, state)


Main control loop to move agent in the maze
def manual_action():
    '''
    #
Add function code here
    #
    '''
while True:
try:
userInputDirections = int(
input("Enter a number between 0 and 3, 0 is up, 1 is down, 2 is left and 3 is right"))

if 0 <= userInputDirections <= 3:
print(userInputDirections)
return userInputDirections
else:
print("try again and this time enter a valid number")
except ValueError:
print("Invalid input. Please enter an integer.")


def random_action():
return random.randint(0, 3)


def Initialise_Env(maze_file):
maze_env = read_maze(maze_file)
start_pos = find_start(maze_env)
maze_env[start_pos[0]][start_pos[1]] = ' '
next_rewards = Env_rewards(maze_env)
next_states = Env_next_states(maze_env, start_pos)
return maze_env, start_pos, next_rewards, next_states


Main function to execute the Q-learning algorithm
Set Q learning parameters like alpha, gamma, epsilon
Initialize the environment and Q-table
Run the Q-learning algorithm over a number of episodes
For each episode, execute actions until the goal is reached
Update the Q-table based on the observed rewards

def main():
Q learning parameters
alpha = 0.2  # learning rate
gamma = 0.9  # discount factor
epsilon = 0.9  # exploration factor
number_episodes = 1  # Number of times the simulation is run
quick_training_ep = 0  # The number of episodes to run quickly before showing the behaviour
I would recommend setting this to a few less than the number of episodes when complete
pause_time = 1  # Number of seconds between steps for final episodes to view Agent behaviour

maze_file = '/Users/arnavgupta/Downloads/maze_env1.txt'  # change path name to desired maze

Initialise Environment from files
maze_env, start_pos, next_rewards, next_states = Initialise_Env(maze_file)
state = start_pos

Initialise Q_table for Agent
Q_table = Q_table_init(maze_env)

Show Environment tables
print("Environment Dictionaries")
print_dict(next_states)
print_dict(next_rewards)

for episode in range(number_episodes):

Flag to run while loop until goal is reached
goal = False

Optionally print the Q_table after each episode
print_Qdict(Q_table)

while not goal:

Choose an action according to the q_learning epsilon greedy policy
This is initialised as a random move policy

action = q_learning_choice(Q_table, state, epsilon)
action = manual_action()
action = random_action()

Evironment(state, action, env_rewards, env_next_states)
n_state, reward = Evironment(state, action, next_rewards, next_states)

Update Q_table for the current state with new reward and new state information
Q_table = Q_table_update(Q_table, state, n_state, reward, action, alpha, gamma)

Update state to new state
state = n_state

if maze_env[n_state[0]][n_state[1]] != "G":
maze_env_pos = copy.deepcopy(maze_env)
maze_env_pos[state[0]][state[1]] = "A"
print_maze(maze_env_pos)
time.sleep(pause_time)

If reward is 100 then the goal state has been reached
if maze_env[n_state[0]][n_state[1]] == "G":
print("Congratulations. Goal state found")
print("Episode: ", episode)
dir_d = {0: "up", 1: "down", 2: "left", 3: "right"}
print(f"agent makes action {dir_d[action]} and recieves observation {n_state} and reward {reward}")
goal = True

print_maze(maze_env)


if __name__ == "__main__":
main()

Thiết kế website giá rẻ

Danh mục

Rewards value not updating