1

I'm coding some RL behaviors on a pacman bot and I messed things up with one of my lists in one of my functions arg_allmax or chooseAction

Here is the code of my class:

package rl;

import java.util.ArrayList;
import java.util.Hashtable;

public class Qlearn {
    private double epsilon = 0.1; // Epsilon parameter for the Epsilon Greedy Strategy 
    private double alpha = 0.2; // Alpha parameter: used to influence o the refresh of Q
    private double gamma = 0.9; // used to notice or not the feedback of the next action ; if =0 -> no feed back

private int actions[];
private Hashtable< Tuple<Integer,Integer>, Double> q; // Q(s,a) : hashTable : <state,action> -> value of q


public Qlearn(int[] actions) {
    this.actions = actions;
    q = new Hashtable< Tuple<Integer,Integer>, Double>();
}

public Qlearn(int[] actions, double epsilon, double alpha, double gamma) {
    this.actions = actions;
    this.epsilon = epsilon;
    this.alpha = alpha;
    this.gamma = gamma;
    q = new Hashtable< Tuple<Integer,Integer>, Double>();
}

public Double getQ(int id_state, int id_action) {
    // get the value of Q for the state of id_state and the action id_action ( return 0 if the value is not in the hashtable ) 
    Tuple<Integer,Integer> t = new Tuple<Integer,Integer> (id_state, id_action); // we creatte a new integer object Tubple with the value of id_state and id_action 
    Double v = q.get(t);
    if(v != null) return v;
    else return 0.0;
}

// get the argmax of a list
public int argmax(double[] list) {
    int arg=-1;
    double max= 0;
    for ( int i = 0; i<list.length; i++){
        if ( list[i]>max ){
            max = list[i];
            arg = i;
        }
    }
    return arg;
}

// get all the argmax if the argmax has several iterations
public ArrayList<Integer> arg_allmax(double[] list) {
    ArrayList<Integer> args = new ArrayList<Integer>();
    int a = argmax(list);
    for ( int i = 0; i< list.length; i++){
        if (list[i] == list[a]){
            args.add(i);
        }
    }
    return args;
}

// get the max of the list
public double max(double[] list) {
    double max_ = -1e20;
    int a = argmax(list);
    max_ = list[a];
    return max_;
}


/*
 * Fonction that updates the hashtable
 *      for the action  id_action and the state  id_state
 *      if Q(s,a) had an old value, we allocate it the new value+ alpha(value - old_value)
 *      if Q(s,a) had not an old value : we allocate reward
 */
public void learnQ(int id_state, int id_action, double reward, double value) {
    Tuple<Integer,Integer> t = new Tuple<Integer,Integer>(id_state,id_action);
    Double oldv = q.get(t);

    if(oldv == null) {

        q.put(t, reward);
    } else {

        q.put(t, oldv+alpha*(value-oldv));
    }
}

/*
 * Here is the Epsilon Greedy strategy
 *      with proba epsilon :we choose a random action
 *      avec proba 1-eps : we choose the most favorable action in fonction of  Q(s,a)
 */
public int chooseAction(int id_state) {
    int action = -1;
    if(Math.random() < epsilon) {

        int i = (int)(Math.random()*actions.length);
        action = actions[i];

    } else { 
        double[] tab = new double[actions.length];
        ArrayList<Integer> argmaxarray = new ArrayList<Integer>();
        for ( int i=0; i>actions.length; i++){
            tab[i]=actions[i];
        }
        argmaxarray=arg_allmax(tab);
        int i=(int)(Math.random()*argmaxarray.size());
        action=argmaxarray.get(i);

    }

    return action;
}


/*
 * Learning after the occurence of a move
 *      1) get the most profitable potential action from  Q(s',a)
 *      2) call learnQ
 */
public void learn(int id_state1, int id_action1, double reward, int id_state2) {
    int futureAction = 0;
    futureAction = chooseAction(id_state2);
    double maxqnew = 0; // REMPLIR  
    maxqnew = getQ(futureAction, id_state2);


    learnQ(id_state1, id_action1, reward, reward + gamma*maxqnew);

}

// Affiche Q(s,a)
private void printQvalue(int id_state) {
    for(int action : actions) {
        Tuple<Integer,Integer> t = new Tuple<Integer,Integer>(id_state,action);
        Double v = q.get(t);
        System.out.print(v+" ");
    }
    System.out.println();
}

Here is what eclipse tells me :

Exception in thread "AWT-EventQueue-0" java.lang.ArrayIndexOutOfBoundsException: -1
    at rl.Qlearn.arg_allmax(Qlearn.java:54)
    at rl.Qlearn.chooseAction(Qlearn.java:108)
    at rl.Qlearn.learn(Qlearn.java:138)

I think it comes somewhere in the else of the chooseAction method using the all_argmax fonction but I cannot find the exact error!

Here are the two involved methods (so it's more readable for you):

all_argmax :

public ArrayList<Integer> arg_allmax(double[] list) {
    ArrayList<Integer> args = new ArrayList<Integer>();
    int a = argmax(list);
    for ( int i = 0; i< list.length; i++){
        if (list[i] == list[a]){
            args.add(i);
        }
    }
    return args;
}

chooseAction :

public int chooseAction(int id_state) {
    int action = -1;
    if(Math.random() < epsilon) {

        int i = (int)(Math.random()*actions.length);
        action = actions[i];

    } else { 
        double[] tab = new double[actions.length];
        ArrayList<Integer> argmaxarray = new ArrayList<Integer>();
        for ( int i=0; i>actions.length; i++){
            tab[i]=actions[i];
        }
        argmaxarray=arg_allmax(tab);
        int i=(int)(Math.random()*argmaxarray.size());
        action=argmaxarray.get(i);

    }

    return action;
}
halfer
  • 19,824
  • 17
  • 99
  • 186
drheinrich940
  • 143
  • 1
  • 13
  • 1
    It would help if you'd: a) translate all your comments into English; b) format your code; c) follow Java naming conventions in sample code; d) reduce the question to a [mcve] (both minimal and complete, neither of which is the case at the moment). – Jon Skeet Nov 08 '16 at 13:08
  • 1
    I've now removed the "first post on stack overflow, thanks for reading my little question. I'm facing an issue on my code that i cant resolve" part twice. This is irrelevant to the question - and worse, as it's at the start of the question, that's what shows up on the main question page. – Jon Skeet Nov 08 '16 at 13:13
  • ok Jon Skeet i'll try to do what you asked me, sorry if the way i structured my post was clumsy. – drheinrich940 Nov 08 '16 at 13:16
  • in `arg_allmax()`: `int a = argmax(list);` might be returning -1. Have you checked that `list` definitely isn't empty? – d.j.brown Nov 08 '16 at 13:17
  • How are you creating the `QLearn` object? All I can see happening is that `actions` is probably an empty array (or all negatives), but we can't know for sure – jonhopkins Nov 08 '16 at 13:18

1 Answers1

3

your IndexOutOfBoundsException occurs because of your argmax([]) method, either because of an empty array or because all the doubles in the list are negative.

In either of these cases the int arg = -1 variable is never set to another value than -1, which is obviously out of bounds in any scenario since -1 is not a valid array position.

The best course of action would be to either check if your array is empty before passing it to argmax or checking if the return value is valid (not -1) before doing anything with it. And also changing double max = 0 to double max = Double.NEGATIVE_INFINITY

Gelunox
  • 772
  • 1
  • 5
  • 23
  • 1
    `Double.MIN_VALUE` will result in unexpected behaviour (see http://stackoverflow.com/questions/3884793/why-is-double-min-value-in-not-negative), I'd use `Double.NEGATIVE_INFINITY` for this use case. – d.j.brown Nov 08 '16 at 13:23
  • 1
    @d.j.brown coincidentally I was reading the same answer just before your comment. Good feedback tho, I've updated my answer – Gelunox Nov 08 '16 at 13:27
  • @Gelunox thank you, i just changed double max as you showed and added a little allert message in case of empty list and things are working again now ! – drheinrich940 Nov 08 '16 at 13:50