at this line:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)
It raises an error:
AttributeError Traceback (most recent call last)
Cell In[200], line 2
1 log_path = '/Users/mafaz2/Desktop/open cv test/Training/Logs'
----> 2 model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/stable_baselines3/ppo/ppo.py:102, in PPO.__init__(self, policy, env, learning_rate, n_steps, batch_size, n_epochs, gamma, gae_lambda, clip_range, clip_range_vf, normalize_advantage, ent_coef, vf_coef, max_grad_norm, use_sde, sde_sample_freq, target_kl, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)
75 def __init__(
76 self,
77 policy: Union[str, Type[ActorCriticPolicy]],
(...)
99 _init_setup_model: bool = True,
100 ):
--> 102 super().__init__(
103 policy,
104 env,
105 learning_rate=learning_rate,
106 n_steps=n_steps,
107 gamma=gamma,
108 gae_lambda=gae_lambda,
109 ent_coef=ent_coef,
110 vf_coef=vf_coef,
111 max_grad_norm=max_grad_norm,
112 use_sde=use_sde,
113 sde_sample_freq=sde_sample_freq,
114 tensorboard_log=tensorboard_log,
115 policy_kwargs=policy_kwargs,
116 verbose=verbose,
117 device=device,
118 seed=seed,
119 _init_setup_model=False,
120 supported_action_spaces=(
121 spaces.Box,
122 spaces.Discrete,
123 spaces.MultiDiscrete,
124 spaces.MultiBinary,
125 ),
126 )
128 # Sanity check, otherwise it will lead to noisy gradient and NaN
129 # because of the advantage normalization
130 if normalize_advantage:
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:76, in OnPolicyAlgorithm.__init__(self, policy, env, learning_rate, n_steps, gamma, gae_lambda, ent_coef, vf_coef, max_grad_norm, use_sde, sde_sample_freq, tensorboard_log, monitor_wrapper, policy_kwargs, verbose, seed, device, _init_setup_model, supported_action_spaces)
53 def __init__(
54 self,
55 policy: Union[str, Type[ActorCriticPolicy]],
(...)
73 supported_action_spaces: Optional[Tuple[spaces.Space, ...]] = None,
74 ):
---> 76 super().__init__(
77 policy=policy,
78 env=env,
79 learning_rate=learning_rate,
80 policy_kwargs=policy_kwargs,
81 verbose=verbose,
82 device=device,
83 use_sde=use_sde,
84 sde_sample_freq=sde_sample_freq,
85 support_multi_env=True,
86 seed=seed,
87 tensorboard_log=tensorboard_log,
88 supported_action_spaces=supported_action_spaces,
89 )
91 self.n_steps = n_steps
92 self.gamma = gamma
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/stable_baselines3/common/base_class.py:160, in BaseAlgorithm.__init__(self, policy, env, learning_rate, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, supported_action_spaces)
158 if env is not None:
159 env = maybe_make_env(env, self.verbose)
--> 160 env = self._wrap_env(env, self.verbose, monitor_wrapper)
162 self.observation_space = env.observation_space
163 self.action_space = env.action_space
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/stable_baselines3/common/base_class.py:209, in BaseAlgorithm._wrap_env(env, verbose, monitor_wrapper)
207 if verbose >= 1:
208 print("Wrapping the env in a DummyVecEnv.")
--> 209 env = DummyVecEnv([lambda: env])
211 # Make sure that dict-spaces are not nested (not supported)
212 check_for_nested_spaces(env.observation_space)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/stable_baselines3/common/vec_env/dummy_vec_env.py:27, in DummyVecEnv.__init__(self, env_fns)
25 def __init__(self, env_fns: List[Callable[[], gym.Env]]):
26 self.envs = [fn() for fn in env_fns]
---> 27 if len(set([id(env.unwrapped) for env in self.envs])) != len(self.envs):
28 raise ValueError(
29 "You tried to create multiple environments, but the function to create them returned the same instance "
30 "instead of creating different objects. "
(...)
35 "Please read https://github.com/DLR-RM/stable-baselines3/issues/1151 for more information."
36 )
37 env = self.envs[0]
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/stable_baselines3/common/vec_env/dummy_vec_env.py:27, in <listcomp>(.0)
25 def __init__(self, env_fns: List[Callable[[], gym.Env]]):
26 self.envs = [fn() for fn in env_fns]
---> 27 if len(set([id(env.unwrapped) for env in self.envs])) != len(self.envs):
28 raise ValueError(
29 "You tried to create multiple environments, but the function to create them returned the same instance "
30 "instead of creating different objects. "
(...)
35 "Please read https://github.com/DLR-RM/stable-baselines3/issues/1151 for more information."
36 )
37 env = self.envs[0]
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/gym/core.py:238, in Wrapper.__getattr__(self, name)
234 if name.startswith("_"):
235 raise AttributeError(
236 "attempted to get missing private attribute '{}'".format(name)
237 )
--> 238 return getattr(self.env, name)
AttributeError: 'function' object has no attribute 'unwrapped'``
I tried building a class:
class shower:
def __init__ (self):
self.Action_Space = Discrete(3)
self.observation_space = Box(low = 0 , high = 100 ,shape = (1,))
self.state = 38+random.randint(-3 , 3)
self.shower_length = 60
def step(self , action):
self.state += action - 1
self.shower_length -=1
if self.state >= 37 and self.state <= 39:
reward = 1
else:
reward = -1
if self.shower_length <= 0 :
done = True
else:
done = False
info = {}
return self.state , reward , done , info
def render(self):
pass
def reset(self):
self.state = np.array([38+random.randint(-3 , 3) ]).astype(float)
self.shower_length = 60
return self.state
`
and training PPO over that, but it resulted in an error