final edit, finishing touches. Python perf test: SPS: 2,045,564.37560…

…13566 on mediocre hardware. C perf test: SPS: 333299.687500. added README. formatted w/ clang-tidy Google for succinctness and readability.
PufferAI · Jan 12, 2025 · fcba83c · fcba83c
1 parent 791bfe5
commit fcba83c
Show file tree

Hide file tree

Showing 6 changed files with 1,555 additions and 1,550 deletions.
diff --git a/pufferlib/ocean/enduro/README.md b/pufferlib/ocean/enduro/README.md
@@ -0,0 +1,44 @@
+# README
+Please star PufferLib on GitHub: it really makes a difference!
+https://github.com/pufferai/pufferlib
+
+by Daniel Addis, 2024
+https://github.com/xinpw8
+
+## Puffer Ocean Enduro
+This project contains a performant reinforcement-learning environment inspired by the classic Atari 2600 game Enduro. It uses C, Cython, and Python to provide an interactive RL environment, trainable with PufferLib.
+
+## Building & Setup
+1. Install dependencies. All commands should be run from the `PufferLib` top directory.
+```sh
+pip install -e .'[cleanrl]'
+```
+Instructions on https://puffer.ai/docs.html
+
+2. Compilation is run when pufferlib is pip installed. After making changes to `cy_enduro.pyx`, `enduro.c`, or `enduro.h`, recompile with:
+    ```sh
+    python setup.py build_ext --inplace
+    ```
+
+3. To locally compile the C environment for testing (without Cython), run:
+    ```sh
+    scripts/build_ocean.sh enduro local
+    ```
+    This builds a runnable `enduro` module in the PufferLib top directory, which you can run with:
+    ```sh
+    ./enduro
+    ```
+    Hold Shift to take control from the agent.
+
+## Training
+To train using the demo script with wandb logs, run:
+```sh
+python demo.py --env puffer_enduro --mode train --track
+```
+Model files are saved at intervals specified in `config/ocean/enduro.ini` to the `experiments/` directory.
+
+## Evaluation
+To evaluate a local checkpoint, run:
+```sh
+python demo.py --env puffer_enduro --mode eval --eval-model-path your_model.pt
+```
diff --git a/pufferlib/ocean/enduro/cy_enduro.pyx b/pufferlib/ocean/enduro/cy_enduro.pyx
@@ -1,8 +1,7 @@
 # cython: language_level=3
 cimport numpy as cnp
-from libc.stdlib cimport malloc, calloc, free
+from libc.stdlib cimport calloc, free
 from libc.string cimport memset
-from libc.time cimport time
 from random import SystemRandom
 
 rng = SystemRandom()
@@ -38,7 +37,6 @@ cdef extern from "enduro.h":
         unsigned char* truncateds
         LogBuffer* log_buffer
         size_t obs_size
-        int num_envs
 
     ctypedef struct GameState
     GameState* make_client(Enduro* env)
@@ -69,8 +67,6 @@ cdef class CyEnduro:
         self.envs = <Enduro*>calloc(num_envs, sizeof(Enduro))
         self.logs = allocate_logbuffer(LOG_BUFFER_SIZE)
 
-        from time import time as py_time
-
         for i in range(num_envs):
             unique_seed = rng.randint(0, 2**32 - 1) & 0x7FFFFFFF
             memset(&self.envs[i], 0, sizeof(Enduro))
@@ -114,4 +110,3 @@ cdef class CyEnduro:
     def log(self):
         cdef Log log = aggregate_and_clear(self.logs)
         return log
-
diff --git a/pufferlib/ocean/enduro/enduro.c b/pufferlib/ocean/enduro/enduro.c
@@ -1,97 +1,92 @@
+#include "enduro.h"
+
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stddef.h>
 #include <time.h>
-#include "enduro.h"
-#include "raylib.h"
-#include "puffernet.h"
 
-#define MAX_ENEMIES 10
+#include "puffernet.h"
+#include "raylib.h"
 
-void get_input(Enduro* env) {
-        if ((IsKeyDown(KEY_DOWN) && IsKeyDown(KEY_RIGHT)) || (IsKeyDown(KEY_S) && IsKeyDown(KEY_D))) {
-            env->actions[0] = ACTION_DOWNRIGHT; // Decelerate and move right
-        } else if ((IsKeyDown(KEY_DOWN) && IsKeyDown(KEY_LEFT)) || (IsKeyDown(KEY_S) && IsKeyDown(KEY_A))) {
-            env->actions[0] = ACTION_DOWNLEFT; // Decelerate and move left
-        } else if (IsKeyDown(KEY_SPACE) && (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D))) {
-            env->actions[0] = ACTION_RIGHTFIRE; // Accelerate and move right
-        } else if (IsKeyDown(KEY_SPACE) && (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A))) {
-            env->actions[0] = ACTION_LEFTFIRE; // Accelerate and move left   
-        } else if (IsKeyDown(KEY_SPACE)) {
-            env->actions[0] = ACTION_FIRE; // Accelerate
-        } else if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) {
-            env->actions[0] = ACTION_DOWN; // Decelerate
-        } else if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) {
-            env->actions[0] = ACTION_LEFT; // Move left
-        } else if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) {
-            env->actions[0] = ACTION_RIGHT; // Move right
-        } else {
-            env->actions[0] = ACTION_NOOP; // No action
-        }
+void get_input(Enduro *env) {
+  if ((IsKeyDown(KEY_DOWN) && IsKeyDown(KEY_RIGHT)) ||
+      (IsKeyDown(KEY_S) && IsKeyDown(KEY_D))) {
+    env->actions[0] = ACTION_DOWNRIGHT;  // Decelerate and move right
+  } else if ((IsKeyDown(KEY_DOWN) && IsKeyDown(KEY_LEFT)) ||
+             (IsKeyDown(KEY_S) && IsKeyDown(KEY_A))) {
+    env->actions[0] = ACTION_DOWNLEFT;  // Decelerate and move left
+  } else if (IsKeyDown(KEY_SPACE) &&
+             (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D))) {
+    env->actions[0] = ACTION_RIGHTFIRE;  // Accelerate and move right
+  } else if (IsKeyDown(KEY_SPACE) &&
+             (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A))) {
+    env->actions[0] = ACTION_LEFTFIRE;  // Accelerate and move left
+  } else if (IsKeyDown(KEY_SPACE)) {
+    env->actions[0] = ACTION_FIRE;  // Accelerate
+  } else if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) {
+    env->actions[0] = ACTION_DOWN;  // Decelerate
+  } else if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) {
+    env->actions[0] = ACTION_LEFT;  // Move left
+  } else if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) {
+    env->actions[0] = ACTION_RIGHT;  // Move right
+  } else {
+    env->actions[0] = ACTION_NOOP;  // No action
+  }
 }
 
 int demo() {
-    Weights* weights = load_weights("resources/enduro/0105enduro_weights.bin", 142218);
-    LinearLSTM* net = make_linearlstm(weights, 1, 68, 9);
-
-    Enduro env = {
-        .num_envs = 1,
-        .max_enemies = MAX_ENEMIES,
-        .obs_size = OBSERVATIONS_MAX_SIZE
-    };
-
-    allocate(&env);
-    GameState* client = make_client(&env);
-    unsigned int seed = 0;
-    init(&env, seed, 0);
-    reset(&env);
+  Weights *weights =
+      load_weights("resources/enduro/enduro_weights.bin", 142218);
+  LinearLSTM *net = make_linearlstm(weights, 1, 68, 9);
 
-    while (!WindowShouldClose()) {
-        if (IsKeyDown(KEY_LEFT_SHIFT)) {
-            get_input(&env);
-        } else {
-            forward_linearlstm(net, env.observations, env.actions);
-        }
+  Enduro env = {.obs_size = OBSERVATIONS_MAX_SIZE};
+  allocate(&env);
+  GameState *client = make_client(&env);
+  unsigned int seed = 0;
+  init(&env, seed, 0);
+  reset(&env);
 
-        c_step(&env);
-        render(client, &env);
+  while (!WindowShouldClose()) {
+    if (IsKeyDown(KEY_LEFT_SHIFT)) {
+      get_input(&env);
+    } else {
+      forward_linearlstm(net, env.observations, env.actions);
     }
 
-    free_linearlstm(net);
-    free(weights);
-    close_client(client);
-    free_allocated(&env);
-    return 0;
+    c_step(&env);
+    render(client, &env);
+  }
+
+  free_linearlstm(net);
+  free(weights);
+  close_client(client);
+  free_allocated(&env);
+  return 0;
 }
 
 void perftest(float test_time) {
-    Enduro env = {
-        .num_envs = 1,
-        .max_enemies = MAX_ENEMIES,
-        .obs_size = OBSERVATIONS_MAX_SIZE
-    };
-
-    allocate(&env);
+  Enduro env = {.obs_size = OBSERVATIONS_MAX_SIZE};
+  allocate(&env);
 
-    unsigned int seed = 12345;
-    init(&env, seed, 0);
-    reset(&env);
+  unsigned int seed = 12345;
+  init(&env, seed, 0);
+  reset(&env);
 
-    int start = time(NULL);
-    int i = 0;
-    while (time(NULL) - start < test_time) {
-        env.actions[0] = rand()%9;
-        c_step(&env);
-        i++;
-    }
+  int start = time(NULL);
+  int i = 0;
+  while (time(NULL) - start < test_time) {
+    env.actions[0] = rand() % 9;
+    c_step(&env);
+    i++;
+  }
 
-    int end = time(NULL);
-    printf("SPS: %f\n", i / (float)(end - start));
-    free_allocated(&env);
+  int end = time(NULL);
+  printf("SPS: %f\n", i / (float)(end - start));
+  free_allocated(&env);
 }
 
 int main() {
-   demo();
-//    perftest(20.0f);
-   return 0;
+  demo();
+  //   perftest(20.0f);
+  return 0;
 }