Skip to content

Commit

Permalink
"Updated game environment attributes for improved AI training
Browse files Browse the repository at this point in the history
Expanded the observations the AI agent makes in the Snake Game from 5 to 30 to provide a more detailed view of the environment. Changed the AI agent's decision period to optimize the decision-making speed. Altered the game object positions, behavior type, activity status, and anchor points in the game levels for a better training environment. These changes are implemented to improve the performance and efficiency of the AI training process."
  • Loading branch information
Pristar4 committed Jul 11, 2023
1 parent d17612f commit 427792e
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 131 deletions.
2 changes: 1 addition & 1 deletion Assets/ML-Agents/Timers/AI_timers.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"count":1,"self":312.13376,"total":578.49973879999993,"children":{"InitializeActuators":{"count":16,"self":0.0010077999999999999,"total":0.0010077999999999999,"children":null},"InitializeSensors":{"count":16,"self":0,"total":0,"children":null},"AgentSendState":{"count":31843,"self":0.5999386,"total":2.4476492999999997,"children":{"CollectObservations":{"count":238033,"self":0.3295816,"total":0.3295816,"children":null},"WriteActionMask":{"count":238033,"self":0.11404349999999999,"total":0.11404349999999999,"children":null},"RequestDecision":{"count":238033,"self":0.3964431,"total":1.4040856,"children":{"AgentInfo.ToProto":{"count":237905,"self":0.37333649999999996,"total":1.0076425,"children":{"GenerateSensorData":{"count":237905,"self":0.63430599999999993,"total":0.63430599999999993,"children":null}}}}}}},"DecideAction":{"count":31843,"self":255.17744639999998,"total":255.1774481,"children":null},"AgentAct":{"count":31843,"self":8.7121871999999989,"total":8.739863999999999,"children":{"AgentInfo.ToProto":{"count":4018,"self":0.0145954,"total":0.027676899999999997,"children":{"GenerateSensorData":{"count":4018,"self":0.0130815,"total":0.0130815,"children":null}}}}}},"gauges":{"SnakeAi.CumulativeReward":{"count":4042,"max":109.316093,"min":-10.4399977,"runningAverage":-2.30997682,"value":39.7010956,"weightedAverage":48.6735573}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1689089519","unity_version":"2023.1.3f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2023.1.3f1\\Editor\\Unity.exe -projectpath C:\\Users\\felix\\SnakeGame -useHub -hubIPC -cloudEnvironment production","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.3.0-exp.3","scene_name":"AI","end_time_seconds":"1689090097"}}
{"count":1,"self":1900.9214464,"total":3646.702797,"children":{"InitializeActuators":{"count":20,"self":0.0020009,"total":0.0020009,"children":null},"InitializeSensors":{"count":20,"self":0.0025141,"total":0.0025141,"children":null},"AgentSendState":{"count":214702,"self":12.033138399999999,"total":59.1252904,"children":{"CollectObservations":{"count":1526209,"self":14.3286336,"total":14.328634099999999,"children":null},"WriteActionMask":{"count":1526209,"self":2.9685319999999997,"total":2.9685319,"children":null},"RequestDecision":{"count":1526209,"self":10.107839199999999,"total":29.7949861,"children":{"AgentInfo.ToProto":{"count":1516682,"self":7.8454624,"total":19.6871471,"children":{"GenerateSensorData":{"count":1516682,"self":11.841683999999999,"total":11.841684299999999,"children":null}}}}}}},"DecideAction":{"count":214702,"self":1617.9479552,"total":1617.9479155,"children":null},"AgentAct":{"count":214702,"self":68.6426944,"total":68.703616099999991,"children":{"AgentInfo.ToProto":{"count":3103,"self":0.0373284,"total":0.0609244,"children":{"GenerateSensorData":{"count":3103,"self":0.023596,"total":0.023596,"children":null}}}}}},"gauges":{"SnakeAi.CumulativeReward":{"count":3201,"max":356.897736,"min":-1060.9856,"runningAverage":117.872993,"value":-10.5,"weightedAverage":13.1152983}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1689105069","unity_version":"2023.1.3f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2023.1.3f1\\Editor\\Unity.exe -projectpath C:\\Users\\felix\\SnakeGame -useHub -hubIPC -cloudEnvironment production","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.3.0-exp.3","scene_name":"AI","end_time_seconds":"1689108716"}}
Binary file modified Assets/SnakeAi.onnx
Binary file not shown.
2 changes: 1 addition & 1 deletion Assets/SnakeGame/Prefabs/SnakeAgent.prefab
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ MonoBehaviour:
m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 5
VectorObservationSize: 30
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
Expand Down
100 changes: 73 additions & 27 deletions Assets/SnakeGame/Scripts/SnakeAgent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ private static bool IsSnakeAlive(Snake snake) {


public override void CollectObservations(VectorSensor sensor) {
const int view = 5; // Define view size as per your needs

// Assuming that there is always at least one snake and one food in the game
var snake = board.Snakes[0];
var food = board.FoodPositions[0];
Expand All @@ -134,10 +136,10 @@ public override void CollectObservations(VectorSensor sensor) {
Vector2 directionToFood = (food - snake.Position);
// Normalize directionToFood
directionToFood = directionToFood.normalized;


// Add the direction to the food (2 floats)
sensor.AddObservation(directionToFood);

// Add the direction to the food (2 floats)
sensor.AddObservation(directionToFood);

// Check for immediate dangers: front, left, right relative to the snake's current direction
var forwardPosition = snake.Position + snake.Direction;
Expand All @@ -148,21 +150,51 @@ public override void CollectObservations(VectorSensor sensor) {
sensor.AddObservation(IsPositionSafe(forwardPosition));
sensor.AddObservation(IsPositionSafe(leftPosition));
sensor.AddObservation(IsPositionSafe(rightPosition));

// Assuming that there is always at least one snake in the game
var snakeHeadPosition = board.Snakes[0].Position;

// Position of the top left corner of the window
var windowStart = snakeHeadPosition - new Vector2Int(view / 2, view / 2);

// Loop over each cell in the window
for (int i = 0; i < view; i++) {
for (int j = 0; j < view; j++) {
var cellPos = windowStart + new Vector2Int(i, j);
var tile = board.GetTile(i, j);

int cellValue;

if (cellPos.x < 0 || cellPos.y < 0 || cellPos.x >= board.Width || cellPos.y >= board.Height) {
// Cell is out of bounds
cellValue = -1;
} else if (tile.Type == TileType.Snake) {
// Cell is occupied by the snake
cellValue = 1;
} else if (tile.Type == TileType.Food) {
// Cell contains food
cellValue = 2;
} else {
// Cell is empty
cellValue = 0;
}

sensor.AddObservation(cellValue);
}
}
}

private bool IsPositionSafe(Vector2Int position) {
// Define the condition for the position to be safe. The following is just a basic example.
// You might need to add more conditions or modify it according to your game rules.

// Check if out of bounds
if (position.x < 0 || position.y < 0 || position.x >= width || position.y >= height)
return false;

// Check if would collide with the snake
foreach (var snake in board.Snakes)
{
foreach (var snake in board.Snakes) {
if (snake.Body.Contains(position)) {

return false;
}
}
Expand All @@ -187,12 +219,11 @@ public override void OnActionReceived(ActionBuffers actions) {
}

var snake = board.Snakes[0];
int lengthAtTimeStep = snake.Length;
Vector2 currentFoodPosition, previousFoodPosition = Vector2.zero;
Vector2 FoodPosition, previousFoodPosition = Vector2.zero;

if (board.FoodPositions.Count > 0) {
currentFoodPosition = board.FoodPositions[0];
previousFoodPosition = currentFoodPosition;
FoodPosition = board.FoodPositions[0];
previousFoodPosition = FoodPosition;
}

switch (action) {
Expand Down Expand Up @@ -237,21 +268,35 @@ public override void OnActionReceived(ActionBuffers actions) {
scoreText.text = "Score: " + board.Snakes[0].Score;


var snakeNewPosition = board.Snakes[0].Position;
// Distance Reward
PreviousDistance = Vector2.Distance(snakeNewPosition, previousFoodPosition);
const int rewardRadius = 4;

if (board.FoodPositions.Count > 0) {
currentFoodPosition = board.FoodPositions[0];
// Calculate Euclidean distance
float currentDistance = Vector2.Distance(snakeNewPosition, currentFoodPosition);

// Define the reward
float reward = Mathf.Log((lengthAtTimeStep + PreviousDistance)
/ (lengthAtTimeStep + currentDistance));

// Apply the reward
// AddReward(reward);
var currentTilePos = snake.Position;
FoodPosition = board.FoodPositions[0];

//Euclidean distance to food from current position
float currentDistance = Vector2.Distance(currentTilePos, FoodPosition);
// Normalize the current distance, max distance will be the sqrt of (height^2 + width^2)
float normalizedCurrentDistance = currentDistance /
Mathf.Sqrt((board.Width * board.Width) +
(board.Height * board.Height));

if (normalizedCurrentDistance <= rewardRadius) {
// Compute the reward based on the Normalized current distance
// float reward = Mathf.Log((snake.Length + PreviousDistance) / (snake.Length + currentDistance));


// Define the reward
float reward = Mathf.Log((snake.Length + PreviousDistance)
/ (snake.Length + normalizedCurrentDistance));

// Debug.Log("Distance Reward:" + reward);

// Apply the reward
// AddReward(reward);
PreviousDistance = currentDistance;
}
}


Expand All @@ -261,7 +306,7 @@ public override void OnActionReceived(ActionBuffers actions) {
board.Snakes[0].AteFood = false;
}

AddReward(-0.001f);
AddReward(-0.1f);

if (!IsSnakeAlive(snake)) {
Debug.Log("Dead : " + -10f);
Expand All @@ -270,8 +315,9 @@ public override void OnActionReceived(ActionBuffers actions) {
}
}


public override void Heuristic(in ActionBuffers actionsOut) {
Debug.Log("Heuristic");
// Debug.Log("Heuristic");
Vector2Int currentDirection = board.Snakes[0].Direction;
int relativeDirection = GetRelativeDirection(currentDirection, _inputDirection);
var discreteActionsOut = actionsOut.DiscreteActions;
Expand All @@ -281,7 +327,7 @@ public override void Heuristic(in ActionBuffers actionsOut) {
private int GetRelativeDirection(Vector2Int currentDirection, Vector2Int inputDirection) {
var clockwiseDirection = RotateClockwise(currentDirection);
var counterClockwiseDirection = RotateCounterClockwise(currentDirection);




Expand All @@ -291,7 +337,7 @@ private int GetRelativeDirection(Vector2Int currentDirection, Vector2Int inputDi
return 1; // go straight
if (inputDirection == clockwiseDirection)
return 2; // turn right


return 1;
}
Expand Down
1 change: 1 addition & 0 deletions Assets/SnakeGame/Scripts/SnakeController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ public Snake[] CreateSnakes(int width, int height, int numberOfSnakes, int start

for (int i = 0; i < numberOfSnakes; i++) {
var startSpawnPosition = new Vector2Int(Random.Range(0, width), Random.Range(0, height));
// var startSpawnPosition = new Vector2Int(0, 0);
var startDirection = Vector2Int.up;
snakeArray[i] = CreateSnake(startSpawnPosition, startDirection, startSize, i);
}
Expand Down
Loading

0 comments on commit 427792e

Please sign in to comment.