Trying to run the code from
https://juliareinforcementlearning.org/docs/How_to_write_a_customized_environment/
comes to the following error in my environment:
LoadError: MethodError: no method matching StateTransformedEnv(::Type{LotteryEnv}, ::typeof(identity), ::typeof(identity))
Closest candidates are:
StateTransformedEnv(::E, ::P, ::M) where {P, M, E<:AbstractEnv} at ~/.julia/packages/ReinforcementLearningEnvironments/Pzsys/src/environments/wrappers/StateTransformedEnv.jl:4
StateTransformedEnv(::Any; state_mapping, state_space_mapping) at ~/.julia/packages/ReinforcementLearningEnvironments/Pzsys/src/environments/wrappers/StateTransformedEnv.jl:15
(A) My question is (of course): how to solve this?
And (B) maybe (if you will: in addition to A): I already asked another question concerning definitions of reinforcment experiments with some problems in loading libraries for experiments (How to start julia reinforcement experiments?) and I had some other problems with using flux and other libraries in combinations (e.g. also with this interestiong library: https://github.com/JuliaReinforcementLearning/ReinforcementLearningAnIntroduction.jl)
Some of them can be used with adding some additional using-statements. But one step further: Is there some way to define using-statements or an environment so consistent or general, that errors like this could be prevented, before they occur? Or: how could I figure out more easily, which library/libraries (or which versions of them) have to be added or combinated, if problems like this occur?
Thank you for any help with A (and B)!
Here is the code:
using ReinforcementLearning
Base.@kwdef mutable struct LotteryEnv <: AbstractEnv
reward::Union{Nothing, Int} = nothing
end
RLBase.action_space(env::LotteryEnv) = (:PowerRich, :MegaHaul, nothing)
RLBase.reward(env::LotteryEnv) = env.reward
RLBase.state(env::LotteryEnv) = !isnothing(env.reward)
RLBase.state_space(env::LotteryEnv) = [false, true]
RLBase.is_terminated(env::LotteryEnv) = !isnothing(env.reward)
RLBase.reset!(env::LotteryEnv) = env.reward = nothing
function (x::LotteryEnv)(action)
if action == :PowerRich
x.reward = rand() < 0.01 ? 100_000_000 : -10
elseif action == :MegaHaul
x.reward = rand() < 0.05 ? 1_000_000 : -10
elseif isnothing(action) x.reward = 0
else
@error "unknown action of $action"
end
end
env = LotteryEnv()
#RLBase.test_runnable!(env)
n_episode = 1000
#for _ in 1:n_episode
# reset!(env)
# while !is_terminated(env)
# env |> action_space |> rand |> env
# end
#end
run(RandomPolicy(action_space(env)), env, StopAfterEpisode(n_episode))
hook = TotalRewardPerEpisode()
#run(RandomPolicy(action_space(env)), env, StopAfterEpisode(n_episode), hook)
#using Plots
#plot(hook.rewards)
using Flux: InvDecay
p = QBasedPolicy(
learner = MonteCarloLearner(;
approximator=TabularQApproximator(
;n_state = length(state_space(env)),
n_action = length(action_space(env)),
opt = InvDecay(1.0)
)
),
explorer = EpsilonGreedyExplorer(0.1)
)
wrapped_env = ActionTransformedEnv(
StateTransformedEnv(
env;
state_mapping=s -> s ? 1 : 2,
state_space_mapping = _ -> Base.OneTo(2)
);
action_mapping = i -> action_space(env)[i],
action_space_mapping = _ -> Base.OneTo(3),
)
LotteryEnv |> StateTransformedEnv |> ActionTransformedEnv
#typename(QBasedPolicy)