I created a custom environment (I checked it through the check_env() function provided by Stable baseline and it's fine). This is the code I use to start the training with DQN
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env as sb3_check_env
import torch
import wandb
from wandb.integration.sb3 import WandbCallback
from gym.version import VERSION
from utils.clustering_phase import load_merged_dataframe
from utils.constants import DATASET_PREDICTION_PATH
from environments.ns_env import NS3OfflineEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
if __name__ == "__main__":
# Set pytorch num threads to 1 for faster training.
torch.set_num_threads(1)
WANDB_NOTEBOOK_NAME = 'ES' #my notebook
WANDB_API_KEY= '1234567890' #my KEY (not showing the real one)
vec_env = OfflineEnv(data="observation_df.csv")
verbosity=2
config = {
"policy_type": "MlpPolicy",
"total_timesteps": 100
}
name='100-dqn'
run = wandb.init(
project="ProjectName",
config=config,
sync_tensorboard=True,
monitor_gym=True,
name=name
)
model = DQN(policy=config["policy_type"], env=vec_env,
batch_size=256,
verbose=verbosity,
tensorboard_log=f"wandb/runs/{name}{run.id}",
)
model.learn(
total_timesteps=config["total_timesteps"],
callback=[WandbCallback(
gradient_save_freq=100,
model_save_freq=100,
model_save_path=f"./models/ppo/{name}{run.id}",
verbose=verbosity)]
)
run.finish()
And from the moment I execute the Python file I get the following
Moreover, on WanDB I'm only getting the "System plots" and nothing about training, so I suppose the training doesn't start at all
What is wrong in my training code?