I am working on a project where I have a Gym environment with a discrete action space and a continuous observation space similar to the CartPole environment from Gym. I am trying to convert the discrete action space to continuous so that the environment will work with model-based RL algos that only work with continuous actions and observations. What is the way to do this? Thank you in advance.
class SimpleEnv(gym.Env):
def __init__(self):
super(SimpleEnv, self).__init__()
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(low=-1, high=1, dtype=np.float32)
self.state = np.zeros(4, dtype=np.float32)
def reset(self):
self.state = np.random.uniform(low=-0.5, high=0.5, size=(1,))
return self.state
def step(self, action):
# Simple transition logic: modify state based on action
if action == 0:
self.state += 0.1
elif action == 1:
self.state -= 0.1
elif action == 2:
self.state = -self.state
reward = -np.linalg.norm(self.state)
done = np.linalg.norm(self.state) < 0.1
info = {}
return self.state, reward, done, info