4

I'm new to Julia and JuliaReinforcementLearning and just want to start the Experiments provided on https://juliareinforcementlearning.org/docs/experiments/

So I made one file like this:

using ReinforcementLearning
using StableRNGs
using Flux
using Flux.Losses

function RL.Experiment(
    ::Val{:JuliaRL},
    ::Val{:BasicDQN},
    ::Val{:CartPole},
    ::Nothing;
    seed = 123,
)
    rng = StableRNG(seed)
    env = CartPoleEnv(; T = Float32, rng = rng)
    ns, na = length(state(env)), length(action_space(env))

    policy = Agent(
        policy = QBasedPolicy(
            learner = BasicDQNLearner(
                approximator = NeuralNetworkApproximator(
                    model = Chain(
                        Dense(ns, 128, relu; init = glorot_uniform(rng)),
                        Dense(128, 128, relu; init = glorot_uniform(rng)),
                        Dense(128, na; init = glorot_uniform(rng)),
                    ) |> gpu,
                    optimizer = ADAM(),
                ),
                batch_size = 32,
                min_replay_history = 100,
                loss_func = huber_loss,
                rng = rng,
            ),
            explorer = EpsilonGreedyExplorer(
                kind = :exp,
                ϵ_stable = 0.01,
                decay_steps = 500,
                rng = rng,
            ),
        ),
        trajectory = CircularArraySARTTrajectory(
            capacity = 1000,
            state = Vector{Float32} => (ns,),
        ),
    )
    stop_condition = StopAfterStep(10_000, is_show_progress=!haskey(ENV, "CI"))
    hook = TotalRewardPerEpisode()
    Experiment(policy, env, stop_condition, hook, "# BasicDQN <-> CartPole")
end

named the file "JuliaRL_BasicDQN_CartPole.jl"

and a second file like this:

include("JuliaRL_BasicDQN_CartPole.jl")
using Plots
pyplot() 
ex = E`JuliaRL_BasicDQN_CartPole`
run(ex)
plot(ex.hook.rewards)
savefig("assets/JuliaRL_BasicDQN_CartPole.png") #hide

named "test.jl". (-> one question: what does Exxx exactly mean??)

The experiment seems to start, it shows this text:

BasicDQN <-> CartPole
≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡

But then it stops wiht this error mesage:

LoadError: UndefVarError: params not defined
Stacktrace:
  [1] update!(learner::BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, batch::NamedTuple{(:state, :action, :reward, :terminal, :next_state), Tuple{Matrix{Float32}, Vector{Int64}, Vector{Float32}, Vector{Bool}, Matrix{Float32}}})
    @ ReinforcementLearningZoo ~/.julia/packages/ReinforcementLearningZoo/tvfq9/src/algorithms/dqns/basic_dqn.jl:78
  [2] update!(learner::BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, traj::CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}})
    @ ReinforcementLearningZoo ~/.julia/packages/ReinforcementLearningZoo/tvfq9/src/algorithms/dqns/basic_dqn.jl:65
  [3] update!
    @ ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/policies/q_based_policies/learners/abstract_learner.jl:35 [inlined]
  [4] update!
    @ ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/policies/q_based_policies/q_based_policy.jl:67 [inlined]
  [5] (::Agent{QBasedPolicy{BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}}})(stage::PreActStage, env::CartPoleEnv{Base.OneTo{Int64}, Float32, Int64, StableRNGs.LehmerRNG}, action::Int64)
    @ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/policies/agents/agent.jl:78
  [6] _run(policy::Agent{QBasedPolicy{BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}}}, env::CartPoleEnv{Base.OneTo{Int64}, Float32, Int64, StableRNGs.LehmerRNG}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode)
    @ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/run.jl:29
  [7] run(policy::Agent{QBasedPolicy{BasicDQNLearner{NeuralNetworkApproximator{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Adam}, typeof(huber_loss), StableRNGs.LehmerRNG}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, CircularArraySARTTrajectory{NamedTuple{(:state, :action, :reward, :terminal), Tuple{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}}}}}, env::CartPoleEnv{Base.OneTo{Int64}, Float32, Int64, StableRNGs.LehmerRNG}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode)
    @ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/run.jl:10
  [8] run(x::Experiment; describe::Bool)
    @ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/experiment.jl:56
  [9] run(x::Experiment)
    @ ReinforcementLearningCore ~/.julia/packages/ReinforcementLearningCore/yeRLW/src/core/experiment.jl:54
 [10] top-level scope
    @ ~/Documents/julia/reinforcement/test.jl:9
 [11] include(fname::String)
    @ Base.MainInclude ./client.jl:476
 [12] top-level scope
    @ REPL[6]:1
 [13] top-level scope
    @ ~/.julia/packages/CUDA/DfvRa/src/initialization.jl:52
in expression starting at /home/std/Documents/julia/reinforcement/test.jl:9

So what params have to be defined else to run the Experiments?

Thank you!

Mike75
  • 504
  • 3
  • 18
  • 1
    The most likely cause of UndefVarError: params is that this was written for an old version of Flux which exported the symbol. Adding `using Flux: params` ought to fix it. – mcabbott Dec 04 '22 at 19:51
  • Thank you. This worked, but next error is: "LoadError: UndefVarError: Dense not defined" (-> line 18) – Mike75 Dec 04 '22 at 20:05
  • 1
    You probably changed the existing `using Flux` line to what mcabbott said, instead add it as a separate line. i.e. you need both `using Flux` and `using Flux: params`. – Sundar R Dec 05 '22 at 03:37
  • Your right. I did replace using Flux instead of adding it. Now I tried both variants: using Flux and then using Flux: params and Flux: params and then using Flux but the first error "ERROR: LoadError: UndefVarError: params not defined" comes back again in bothe cases. – Mike75 Dec 05 '22 at 05:48
  • There is one thing I found out: if I start the script for three times: (1) with line #using Flux commented out, the dense error occurs. (2) then with line using Flux NOT commented: the params error occurs. And then (3) with line using Flux commented out again: once again the params error occurs. (-> julia runs in a Ubuntu console by running command include("test.jl")). The first time the include command runs it takes much longer and at the beginning it shows a progress bar, which is not again shown in second and further executions. – Mike75 Dec 05 '22 at 06:05
  • In neural_network_approximator.jl (as an example) there is Flux.params mentioned. – Mike75 Dec 06 '22 at 07:39

1 Answers1

0

Guess, I found the answer here: https://juliareinforcementlearning.org/ (in "Get started in 3 lines!"):

So the first step to do the demo would be to :

]add ReinforcementLearningExperiments

and then add

using ReinforcementLearningExperiments

to the first line of file JuliaRL_BasicDQN_CartPole.jl

Second is to look at the file ReinforcementLearningExperiments.jl in .julia/packages/ReinforcementLearningExperiments/dWZym/src

The imports/using - block looks like this:

using ReinforcementLearning
using Requires
using StableRNGs
using Flux
using Flux.Losses
using Setfield
using Dates
using TensorBoardLogger
using Logging
using Distributions
using IntervalSets
using BSON

A short form fo this is enough for the DQN-Demo. So the corrected version of the first file "JuliaRL_BasicDQN_CartPole.jl" looks like this now:

using ReinforcementLearningExperiments
using ReinforcementLearning
using StableRNGs
using Flux
using Flux.Losses
using Dates
using Logging


function RL.Experiment(
    ::Val{:JuliaRL},
    ::Val{:BasicDQN},
    ::Val{:CartPole},
    ::Nothing;
    seed = 123,
)
    rng = StableRNG(seed)
    env = CartPoleEnv(; T = Float32, rng = rng)
    ns, na = length(state(env)), length(action_space(env))

    policy = Agent(
        policy = QBasedPolicy(
            learner = BasicDQNLearner(
                approximator = NeuralNetworkApproximator(
                    model = Chain(
                        Dense(ns, 128, relu; init = glorot_uniform(rng)),
                        Dense(128, 128, relu; init = glorot_uniform(rng)),
                        Dense(128, na; init = glorot_uniform(rng)),
                    ) |> gpu,
                    optimizer = ADAM(),
                ),
                batch_size = 32,
                min_replay_history = 100,
                loss_func = huber_loss,
                rng = rng,
            ),
            explorer = EpsilonGreedyExplorer(
                kind = :exp,
                ϵ_stable = 0.01,
                decay_steps = 500,
                rng = rng,
            ),
        ),
        trajectory = CircularArraySARTTrajectory(
            capacity = 1000,
            state = Vector{Float32} => (ns,),
        ),
    )
    stop_condition = StopAfterStep(10_000, is_show_progress=!haskey(ENV, "CI"))
    hook = TotalRewardPerEpisode()
    Experiment(policy, env, stop_condition, hook, "# BasicDQN <-> CartPole")
end

and with this modifications the Experiment does the simulation.


----------> ANNOTATION: I just found a detailed description of all aspects of reinforcement learning and Julia reinforcement experiments in this notebooks:

https://github.com/JuliaReinforcementLearning/ReinforcementLearningAnIntroduction.jl/tree/master/notebooks

The notebooks run fine with my Julia version 1.8.2 and have a lot of text-explanations in the notebook-annotations, which can clarify all the questions how to start experiments and use environments.

Mike75
  • 504
  • 3
  • 18