I'm trying to apply Tensorflow PPO Agent to a biopedal robot model to learn walking.
I suppose the input, which I suppose is the observation_spec
, has 12 dimensions but I don't know why ndim
is found to be 1. Am I missing something..?
class BipedPPoEnv(py_environment.PyEnvironment):
def __init__(self):
self._action_spec = array_spec.BoundedArraySpec(
shape=(), dtype=np.float32, minimum=-1.5, maximum=1.5, name='action')
self._observation_spec = array_spec.BoundedArraySpec(
shape=(12,), dtype=np.float32, name='observation')
...
def create_networks(tf_env):
actor_net = ActorDistributionRnnNetwork(
tf_env.observation_spec(),
tf_env.action_spec(),
input_fc_layer_params=None,
lstm_size=(128, 128),
output_fc_layer_params=None,
activation_fn=None)
value_net = ValueRnnNetwork(
tf_env.observation_spec(),
input_fc_layer_params=None,
lstm_size=(128, 128),
output_fc_layer_params=None,
activation_fn=None)
return actor_net, value_net
...
if __name__ == '__main__':
tf_env = tf_py_environment.TFPyEnvironment(BiopedPPOenv)
learning_rate = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
actor_net, value_net = create_networks(tf_env)
agent = ppo_agent.PPOAgent(
tf_env.time_step_spec(),
tf_env.action_spec(),
optimizer=optimizer, # need to define
actor_net=actor_net, # need to define
value_net=value_net, # need to define
num_epochs=10,
gradient_clipping=0.5,
entropy_regularization=1e-2,
importance_ratio_clipping=0.2,
use_gae=True,
use_td_lambda_return=True
)
And the error code is as follows:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/input_spec.py:193 assert_input_compatibility
str(x.shape.as_list()))
ValueError: Input 0 of layer bias_layer is incompatiblewith the layer: : expected min_ndim=2, found ndim=1. Full shape received: [0]