I am using the below approach to create simple DL-RL Model, but I am getting this error:
ValueError: Error when checking input: expected flatten_1_input to have 2 dimensions, but got array with shape (1, 1, 2)
I am using the following versions of libraries and Python:
Python 3.11.7
TensorFlow: Version: 2.13.0
Keras: 2.13.1
import numpy as np
import pandas as pd
import gym
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from tensorflow.keras.optimizers.legacy import Adam
class CustomEnv(gym.Env):
def __init__(self, df):
super(CustomEnv, self).__init__()
self.df = df
self.action_space = gym.spaces.Discrete(1) # Action space (predict F_1_d_returns)
self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32) # State space (1_d_returns, 2_d_returns)
self.current_step = 0
def reset(self):
# Reset the environment to initial state
self.current_step = 0
self.state = self.df.iloc[self.current_step, 1:3].values # Start with first row's 1_d_returns and 2_d_returns
return self.state
def step(self, action):
# Take an action (not relevant here as we are predicting)
self.current_step += 1
done = self.current_step >= len(self.df) - 1
if done:
next_state = self.state
else:
next_state = self.df.iloc[self.current_step, 1:3].values
# Flatten the next_state
next_state = np.reshape(next_state, (-1,))
reward = 0 # No reward for predicting
info = {} # Additional information (if needed)
return next_state, reward, done, info
df = pd.DataFrame({
'F_1_d_returns': [-0.038076, 0.083333, 0.060577, -0.013599, -0.020221],
'1_d_returns': [-0.062030, -0.038076, 0.083333, 0.060577, -0.013599],
'2_d_returns': [-0.133681, -0.097744, 0.042084, 0.148958, 0.046154]
})
env = CustomEnv(df)
states = env.observation_space.shape
actions = env.action_space.n
def build_model(input_shape, nb_actions):
model = Sequential()
model.add(Flatten(input_shape=input_shape)) # Adjust input shape here
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))
return model
model = build_model(states, actions)
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,
nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)