0

I'm trying to implement a Reinforcement Learning snooker player and I want to train it using ml-agents in Unity. The thing is that I am not used to this library and I am not sure how should I do it. So when I try to train it, the agent does nothing. Could you check what am I doing wrong and how can I solve it? Thanks so much. This is the main code:


using System;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine;
using System.Collections;
using UnityEngine.UI;


public struct Player
{
    public int score;
    public bool isNPC;
}

public class Snooker2D : Agent
{
    public GameObject cueBall;
    public Transform stick;
    public Transform ball;
    public Transform selectionFx;
    public Slider slider;

    int currentPlayer = 1;
    int nPlayers = 2;
    Player[] players = new Player[2];
    bool follow = true;
    bool doubleShot = false;
    float ballRadius = 0;
    LayerMask layerMaskBalls = 1 << 9;
    LayerMask layerMaskBallsAndWalls = 1 << 9 | 1 << 10;
    LayerMask layerMaskWalls = 1 << 10;

    RaycastHit2D hit;
    float dist = 0;
    float minDist = 0;
    float maxDist = -3;
    float forceMultiplier = 2.5f;

    Vector3 whiteBallPos = new Vector3(-4.3111f, 0, 0);

    public override void OnEpisodeBegin()
    {
        // Reset the environment
        Application.LoadLevel(Application.loadedLevel);
    }

    public override void CollectObservations(VectorSensor sensor)
    {
        // Target and Agent positions
        sensor.AddObservation(ball.position);
        // sensor.AddObservation(stick.position);

        // All balls positions
        foreach (GameObject ball in GameObject.FindGameObjectsWithTag("Ball"))
        {
            sensor.AddObservation(ball.transform.position);
        }
    }

    public override void OnActionReceived(ActionBuffers actionBuffers)
    {
        // Actions, size = 3, the force vector
        var action = actionBuffers.ContinuousActions;
        Vector3 force = new Vector3(action[0], action[1], action[2]);

        // Apply the force vector to the game environment, for example by adding it to the velocity of the cue ball
        Shoot(force);
    }

    public override void Heuristic(in ActionBuffers actionsOut)
    {
        var continuousActionsOut = actionsOut.ContinuousActions;

        // Set the force vector using keyboard or gamepad inputs, each action is between -1 and 1
        // Each action is an axis of the force vector
        continuousActionsOut[0] = Input.GetAxis("Horizontal");
        continuousActionsOut[1] = Input.GetAxis("Vertical");
        continuousActionsOut[2] = Input.GetAxis("Jump");
    }


    void Start()
    {
        players[1].isNPC = true;

        ballRadius = ball.GetComponent<CircleCollider2D>().radius;
        minDist = -(ballRadius + ballRadius / 2);
        dist = Mathf.Clamp(maxDist / 2, maxDist, minDist);

        slider.maxValue = -maxDist;
        slider.minValue = -minDist;
        slider.value = dist + -maxDist - minDist;

        selectionFx.GetComponent<Fader>().StartFade();
    }

    void Update()
    {
        if (Input.GetKeyDown("r")) Application.LoadLevel(Application.loadedLevel);

        // PLAYER TURN
        if (follow)
        {
            Vector3 forceDir = Vector3.zero;
            // Analog Player
            if (!players[currentPlayer].isNPC)
            {
                // MOUSE: Get mouse position
                Vector3 mPos = Camera.main.ScreenToWorldPoint(new Vector3(Input.mousePosition.x, Input.mousePosition.y, 10));
                PowerAdjust();
                RotateStickAroundBall(mPos);
                ProjectTrajectory();
                forceDir = (ball.position - stick.position).normalized * -(dist - minDist - 0.02f) * forceMultiplier;
                if (Input.GetMouseButtonUp(0)) Shoot(forceDir); // && hit.collider != null)
            }
            // AI Player
            else
            {
                // TODO: Here will be the AI
                // Reward the model if there are less balls on the table
                // Apply the action to the ball
                //forceDir = new Vector3(1, 1, 1);
                //Shoot(forceDir);
                var continuousActionsOut = new ActionBuffers();
                Heuristic(continuousActionsOut);
                SetReward(1.0f / GameObject.FindGameObjectsWithTag("Ball").Length);
                if (GameObject.FindGameObjectsWithTag("Ball").Length == 1)
                {
                    // End the episode if there is only one ball left
                    EndEpisode();
                }
            }
        }

        // start following
        if (!follow)
        {
            if (AllBallsStopped() && ball.GetComponent<Rigidbody2D>().velocity.sqrMagnitude == 0.0f)
            {
                // Update player score
                // players[currentPlayer].score = UpdateScore(currentPlayer);
                // currentPlayer = (currentPlayer + 1) % nPlayers;
                follow = true;
                if (cueBall.activeSelf == false)
                {
                    SetReward(-0.1f);
                    RespawnBall();
                }
                selectionFx.GetComponent<Fader>().StartFade();
                Invoke("HideShowStick", 0.2f);
            }
        }

    } // update()
}

I tried reading about this, but I couldn't find much information.

  • Hm I also have not much experience but one guess would be: Have in mind that the actions are basically taken in very rapid frames .. you are each time calling `Shoot` adding a force to the ball ... now the nature of ml-agents in general is that they basically start by randomly applying actions until they learn that some of them result in better rewards .. your ball gets a new random direction each frame and therefore will only barely randomly jitter around it's original position -> until anything meaningful can happen your episode is probably over – derHugo Dec 15 '22 at 20:01
  • => you would rather need to apply one shoot -> simulate the entire physics until this turn is over and only then check for rewards and continue with the next shoot .. only then you can validly apply rewards for the results of each shoot – derHugo Dec 15 '22 at 20:01
  • @derHugo I've been playing with the code a little bit more with the code and it seems that the main script (this one) is not running. I thought that the class `Agent` would be executed when running the game as it is a child of `MonoBehaviour`. However, it doesn't execute. Any idea about this? – Pablo Olivares Dec 15 '22 at 22:48
  • all I can do then would be the normal setup questions: is this attached to an active object in your scene? have you done the rest of the ml-agents configuration? are you sure the `BehaviourParameters -> Behaviour Name` matches the one from your config file? in general do those setting in `BehaviourParamters` match your requirements? And if this is about reinforcement training - does your [reinforcement](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Getting-Started.md#training-a-new-model-with-reinforcement-learning) actually run? – derHugo Dec 16 '22 at 06:40
  • It seems the reason it doesn't work is because of the `OnEpisodeBegin()` method. When I delete it, the game runs. Why could be this? – Pablo Olivares Dec 16 '22 at 09:54
  • what does `Application.LoadLevel` do? If this happens to e.g. reload the scene then this agent is destroyed and a new one loaded so the connection might get lost .... – derHugo Dec 16 '22 at 11:45

0 Answers0