i am trying to implement an reinforcement agent, which decides on choosing machines for orders on a production site. I created a custom env in Simpy which asks the Agent on different times, what machines he wants to choose for specific orders, unfortunately i cannot figure out how to implement, that the environment has the leading role: the agent should only do something when he is asked for his action.
My Environment works like this: In the reset- function the Simpy simulation gets started, for every first operation for every order in an orderbook, the agent is asked to select the machine (def initProcesses(), def step()). After each order is finished(def OnTimeoutComplete()) the next operation should be scheduled by the agent(again def Step() is activated). Also new orders are created simultaneously, which should immediately get scheduled.
For implementation at the end i create a gym-env instance of the class and activate the reset function, which is supposed to trigger the Simpy- simulation.
I get the Error: "TypeError: Step() missing 1 required positional argument: 'action'", which i suppose is from my incorrect implementation of the step- function.
Is it possible to implement my env, the way i described or do i have to remodel it, by involving everything in the step- func and let it get activated by the agent? I would very much appreciate your help as this is my first project involving RL and i am keen to get more understanding. Thank you!
#Import Libraries
#common Libraries
import random
import simpy
import numpy
import queue
#gym Libraries
import gym
from gym import spaces
from gym.spaces import Discrete, Box, Dict
#sb3 Libraries
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
class FJSP(gym.Env):
#Initialize action- and statespace
def __init__(self,nMachinesPerOperation):
super(FJSP, self).__init__()
self.action_space = spaces.Discrete(low = 0, high = max(nMachinesPerOperation), shape = (1,))
self.observation_space = spaces.Dict(
spaces = {
"AuftragVariante": spaces.Discrete(4),
##"AuftragOperationstyp": spaces.Discrete(3),
##"AuftragOperationszeit": spaces.Box(0,1,shape=(1,),dtype=numpy.float32),
##"AuftragSlackzeit": spaces.Box(0,1,shape=(1,),dtype=numpy.float32),
##"AuftragFortschritt": spaces.Box(0,1,shape=(1,),dtype=numpy.float32),
##"AuftragErwartungVerspätung": spaces.Discrete(1),
##"SystemVerweildauer": spaces.Box(0,1,shape=(1,),dtype=numpy.float32),
##"StandardabweichungPufferauslastung": spaces.Box(0,1,shape=(1,),dtype=numpy.float32),
##"MaschineMedianPufferauslastung": spaces.Box(0,1,shape=(1,),dtype=numpy.float32),
##"SystemMedianPufferauslastung": spaces.Box(0,1,shape=(1,),dtype=numpy.float32)
})
self.state = 0
#CreateOrders Functions
#Initializes by starting a process
def CreateOrdersInit(self,env):
self.env.process(self.yieldCreateOrdersEvents(env))
yield self.env.timeout(1) #needed for beeing a Generator function
#Yield Timeouts and set callback function for when timeout is done
def yieldCreateOrdersEvents(self,env):
self.TimeoutCreateOrders=self.env.timeout(random.randint(self.TimespanCreateorders*0.8,self.TimespanCreateorders*1.2))
self.TimeoutCreateOrders.callbacks.append(lambda event: self.AtCreateOrdersEvents(env))
yield self.env.timeout(1) #needed for beeing a Generator function
#After timeout finished order with random variant will be created
def AtCreateOrdersEvents(self,env):
self.VariantChoice = random.randint(0, len(self.Variants)-1)
for a in range(self.nOrders):
if self.Orderbook[a][0][2] == self.nOperationsPerVariant[self.Orderbook[a][0][1]]:
self.Orderbook[a][0][0] = env.now+258*self.nOrders*self.StrictnessDeadline
self.Orderbook[a][0][1] = self.VariantChoice
self.Orderbook[a][0][2] = 0 #Counter for Operations in this order
for b in range(1, self.nOperationsPerVariant[self.VariantChoice]+1):
self.Orderbook[a][b][0] = self.Variants[self.VariantChoice][b-1][0]
self.Orderbook[a][b][1] = self.Variants[self.VariantChoice][b-1][1]
self.Orderbook[a][b][2] = 1
print("Creating new Order:",self.Orderbook[a],"in Line:",a,"at Time",self.env.now)
print("Starting new order: Order,Operation:",a,0)
self.env.process(self.step(env,a,1))
break
else:
print("no slots free at time ", self.env.now)
self.env.process(self.yieldCreateOrdersEvents(env))
#Main Functions
#Initialize Processes for each order at time 0
def initProcesses(self,env):
for i in range(self.nOrders):
self.env.process(self.step(env,i,1))
yield self.env.timeout(1) #needed for beeing a Generator function
#Step- function
#Choosing Resource
def step(self,env,i,j,action):
if env.now > 7000:#self.episodeLength:
self.done = True
self.reset()
#Action will be inserted instead of next line
[j][0])]) #chooses random Resource out of the needed Resourcegroup
#simple action masking
if self.Orderbook[i][j][0] == 0 or self.Orderbook[i][j][0] == 2:
if action == 4:
action = 0
if action == 5:
action = 1
if self.Orderbook[i][j][0] == 2:
if action == 2:
action = 0
elif action == 3:
action = 1
#Putting order in queues
self.OrderQueue[(self.Orderbook[i][j][0])][action].put(i)
self.OperationQueue[(self.Orderbook[i][j][0])][action].put(j)
self.QueueCounter[(self.Orderbook[i][j][0])][action] = self.OrderQueue[(self.Orderbook[i][j][0])][action].qsize()
observation = self.getObservation(i,j)
reward = self.getReward(observation)
print((self.Orderbook[i][j][0]),action,"Put in Queue: (Group,Resource),Order,Operation:",i,",",j,"Resource Queue Length:",self.QueueCounter[(self.Orderbook[i][j][0])][action])
#if queue is empty start the operation
if self.ResourcesStatus[(self.Orderbook[i][j][0])][action] == 0:
NextOrder = self.OrderQueue[(self.Orderbook[i][j][0])][action].get()
NextOperation = self.OperationQueue[(self.Orderbook[i][j][0])][action].get()
self.yieldTimeoutEvents(env,NextOrder,NextOperation,action)
yield self.env.timeout(1)
return observation, reward, self.done
#create timeouts
def yieldTimeoutEvents(self,env,i,j,action):
print((self.Orderbook[i][j][0]),action,"Get out of Queue: (Group,Resource),Order,Operation:",i,",",j,"CurrentTime:",env.now,"Timeoutlength:", Orderbook[i][j][1])
self.ResourcesStatus[(self.Orderbook[i][j][0])][action] = 1
#Set Timeout
self.TimeoutEvents.append(simpy.events.Timeout(env,self.Orderbook[i][j][1]))
#Callback for when timeout is over
self.TimeoutEvents[len(self.TimeoutEvents) - 1].callbacks.append(lambda event: self.onTimeoutComplete(env,i,j,action))
#when timeout complete
def onTimeoutComplete(self,env,i,j,action):
self.Orderbook[i][0][2]+= 1 #Operationcounter +=1
self.Orderbook[i][j][2]= 0 #Operation is beeing fullfilled
print(self.Orderbook[i])
#if queue not empty get next operation an yield timeout
if not self.OrderQueue[(self.Orderbook[i][j][0])][action].empty():
OnTimeoutCompleteNextOrder = self.OrderQueue[(self.Orderbook[i][j][0])][action].get()
OnTimeoutCompleteNextOperation = self.OperationQueue[(self.Orderbook[i][j][0])][action].get()
self.yieldTimeoutEvents(env,OnTimeoutCompleteNextOrder,OnTimeoutCompleteNextOperation,action)
#else set the status to not active
else:
print((self.Orderbook[i][j][0]),action,"Finished:(Group,Resource),FinishTime: ",self.env.now)
self.ResourcesStatus[(self.Orderbook[i][j][0])][action] = 0
#choose resource for next operation in the orders operation sequence
if j < self.nOperationsPerVariant[(self.Orderbook[i][0][1])]+1:
self.env.process(self.step(env,i,j+1))
#Resets/Initializes the whole digital Twin and starts simulation
def reset(self):
#Setting of Parameters for Resources
#4x Pick & Place (nMachinesPerOperation[0])
#6x Schrauben (nMachinesPerOperation[1])
#2x Qualitätskontrolle + Verpackung (nMachinesPerOperation[2])
self.nMachinesPerOperation = [4, 6, 2]
#Buffersize shall be 5-10 to realistically portray the Workshop
self.BufferSize = 10
#Strictness of how tight the deadine ist(1=very strict; 1,4=not so strict)
self.StrictnessDeadline=1.2
#Setting of Parameters for Orders
self.nOrders = 20 #Count of Orders in Orderbook created at initialization/ maximum possible count
self.TimespanCreateorders = 250 #Timespan after which to create new orders
#Building of Variants
#Line 0-3: possible variants
#Row 0-6: strictly to be followed sequence of necessary operations
#For each Operation:["Resourcegroud needed for Operation", "Required Time for Operation"]
self.Variants = [[[0, 69], [1, 75], [0, 15], [1, 37], [2, 35], [0, 0], [0, 0]],
[[0, 81], [1, 37], [0, 15], [1, 75], [0, 15], [1, 37], [2, 35]],
[[0, 58], [1, 75], [0, 15], [1, 37], [2, 35], [0, 0], [0, 0]],
[[0, 70], [1, 37], [0, 15], [1, 75], [0, 15], [1, 37], [2, 35]]]
#Numbers of Operations necessary for each variant/ length of each line
#(just because of implementation issues later on all lines have same dimensionality,)
#(Operations with [0, 0] shall be ignored)
#(if Variants.shape[1] != len(nMachinesPerOperation): FAult!)
self.nOperationsPerVariant = [5, 7, 5, 7]
self.lenVariants = 7
#Build Orderbook
#[[[Deadline, Variant, Counter],[Resourcegroup, Time, Status],[Resourcegroup, Time, Status],...]
# [[Deadline, Variant, Counter],[Resourcegroup, Time, Status],[Resourcegroup, Time, Status],...
# [[Deadline, Variant, Counter],[Resourcegroup, Time, Status],[Resourcegroup, Time, Status]]
self.Orderbook = [[[0 for _ in range(3)] for _ in range(self.lenVariants+ 1)] for _ in range(self.nOrders)]
for a in range(self.nOrders):
VariantChoice = random.randint(0, len(self.Variants)-1)
self.Orderbook[a][0][0] = 258*self.nOrders*self.StrictnessDeadline
self.Orderbook[a][0][1] = VariantChoice
#Orderbook[a][0][2] = 0 #Counter for Operations in this order
for b in range(1, self.nOperationsPerVariant[VariantChoice]+1):
self.Orderbook[a][b][0] = self.Variants[VariantChoice][b-1][0]
self.Orderbook[a][b][1] = self.Variants[VariantChoice][b-1][1]
self.Orderbook[a][b][2] = 1 #Status of Operation(1 = Not Done; 0 = Done)
#Status(0 = Idle, 1 = In Operation)
self.ResourcesStatus = []
for k in range(len(self.nMachinesPerOperation)):
self.ResourcesStatus.append([0 for l in range(self.nMachinesPerOperation[k])])
#Queues of pending Operations for each Resource
self.OrderQueue = [[queue.Queue() for j in range(self.nMachinesPerOperation[i])] for i in range(len(self.nMachinesPerOperation))]
self.OperationQueue = [[queue.Queue() for j in range(self.nMachinesPerOperation[i])] for i in range(len(self.nMachinesPerOperation))]
self.QueueCounter = [[[0 for j in range(self.nMachinesPerOperation[i])] for i in range(len(self.nMachinesPerOperation))]]
#Init of global variable for signaling line of new created order in orderbook
self.LineOfNewOrder = 0
#Array for Timeouts
self.TimeoutEvents= []
self.done = False
#Build Environment
#self.env = simpy.Environment()
#Start Simulation
self.env.process(self.initProcesses(self.env))
self.env.process(self.CreateOrdersInit(self.env))
#set runtime
self.env.run(until=7000)
#observation starts with first operation of first order
observation = self.getObservation(0,1)
return observation
def getObservation(self,i,j):
observation = {
"AuftragVariante": numpy.array([self.Orderbook[i][0][1]], dtype=int),
#"AuftragOperationstyp": numpy.array([self.Orderbook[i][j][0]], dtype=int),
#"AuftragOperationszeit": numpy.array([self.Orderbook[i][j][1]], dtype=float),
#"AuftragSlackzeit": numpy.array([self.Orderbook[i][0][0]- self.env.now], dtype=float),#NICHT SLACK,restliche Beabeitungszeit#-übrige Operationen dann passts
#"AuftragFortschritt":numpy.array([j/(self.nOperationsPerVariant[self.Orderbook[i][0][1]])], dtype=float),
#"AuftragErwartungVerspätung": getAuftragErwartungVerspätung,
#"SystemVerweildauer":
#"MaschineStandardabweichungPufferauslastung":
#"MaschineMedianPufferauslastung": numpy.array([numpy.median(self.QueueCounter[(self.Orderbook[i][j][0])])], dtype=float),
#"SystemMedianPufferauslastung":numpy.array([numpy.median(self.QueueCounter)], dtype=float)
}
return observation
def getReward(self,observation):
reward = -2 + numpy.exp(0.69 * observation["SystemMedianPufferauslastung"])
#print("reward:",reward)
return reward
#Build Environment
env = FJSP()
model = PPO('MlpPolicy',env=env)
model.learn(total_timesteps=100)
obs = env.reset()
for i in range(1000):
action, __state = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
if done:
obs = env.reset()
I am very much looking forward to your answers