From b12ea887dd9240082910153023bfe7825f1e3783 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 13:35:42 +0200
Subject: [PATCH 01/44] Save best trial kwargs for PPO model

---
 src/optimize.py | 122 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 30 deletions(-)

diff --git a/src/optimize.py b/src/optimize.py
index 20a5f66..4814509 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -7,13 +7,14 @@
 import time
 import torch
 import torch.nn as nn
-from fastfiz_env.make import make_wrapped_env, make_wrapped_vec_env
+from fastfiz_env.make import make_callable_wrapped_env
 from fastfiz_env.reward_functions import DefaultReward, WinningReward, RewardFunction
 from optuna.pruners import MedianPruner
 from optuna.samplers import TPESampler
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import EvalCallback
 from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.env_util import make_vec_env
 from typing import Any, Dict
 
 
@@ -43,16 +44,20 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
         "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]
     )
     vf_coef = trial.suggest_float("vf_coef", 0, 1)
-    net_arch_type = trial.suggest_categorical("net_arch", ["tiny", "small", "medium"])
+    net_arch_type = trial.suggest_categorical(
+        "net_arch_type", ["tiny", "small", "medium"]
+    )
     # Uncomment for gSDE (continuous actions)
     # log_std_init = trial.suggest_float("log_std_init", -4, 1)
     # Uncomment for gSDE (continuous action)
     # sde_sample_freq = trial.suggest_categorical("sde_sample_freq", [-1, 8, 16, 32, 64, 128, 256])
     # Orthogonal initialization
-    ortho_init = False
+    ortho_init = trial.suggest_categorical("ortho_init", [False])
     # ortho_init = trial.suggest_categorical('ortho_init', [False, True])
     # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
-    activation_fn_name = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
+    activation_fn_name = trial.suggest_categorical(
+        "activation_fn_name", ["tanh", "relu"]
+    )
     # lr_schedule = "constant"
     # Uncomment to enable learning rate schedule
     # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
@@ -65,6 +70,40 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
 
     # Independent networks usually work best
     # when not working with images
+    return params_to_kwargs(
+        batch_size=batch_size,
+        n_steps=n_steps,
+        gamma=gamma,
+        learning_rate=learning_rate,
+        ent_coef=ent_coef,
+        clip_range=clip_range,
+        n_epochs=n_epochs,
+        gae_lambda=gae_lambda,
+        max_grad_norm=max_grad_norm,
+        vf_coef=vf_coef,
+        net_arch_type=net_arch_type,
+        ortho_init=ortho_init,
+        activation_fn_name=activation_fn_name,
+    )
+
+
+def params_to_kwargs(
+    *,
+    batch_size,
+    n_steps,
+    gamma,
+    learning_rate,
+    ent_coef,
+    clip_range,
+    n_epochs,
+    gae_lambda,
+    max_grad_norm,
+    vf_coef,
+    net_arch_type,
+    ortho_init,
+    activation_fn_name,
+    **kwargs,
+):
     net_arch = {
         "tiny": dict(pi=[64], vf=[64]),
         "small": dict(pi=[64, 64], vf=[64, 64]),
@@ -90,12 +129,12 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
         "max_grad_norm": max_grad_norm,
         "vf_coef": vf_coef,
         # "sde_sample_freq": sde_sample_freq,
-        "policy_kwargs": dict(
+        "policy_kwargs": {
             # log_std_init=log_std_init,
-            net_arch=net_arch,
-            activation_fn=activation_fn,
-            ortho_init=ortho_init,
-        ),
+            "net_arch": net_arch,
+            "activation_fn": activation_fn,
+            "ortho_init": ortho_init,
+        },
     }
 
 
@@ -145,19 +184,23 @@ def objective(
     eval_freq: int,
     n_timesteps: int,
     start_time: str,
+    no_logs: bool,
 ) -> float:
     kwargs = sample_ppo_params(trial)
     N_ENVS = 4
 
-    env = make_wrapped_vec_env(
-        env_id, num_balls, max_episode_steps, N_ENVS, reward_function
+    env = make_vec_env(
+        make_callable_wrapped_env(
+            env_id, num_balls, max_episode_steps, reward_function
+        ),
+        n_envs=N_ENVS,
     )
 
     model = PPO(
         "MlpPolicy",
         env,
         **kwargs,
-        tensorboard_log=f"logs/trials",
+        tensorboard_log="logs/trials" if not no_logs else None,
     )
 
     # Create the callback that will periodically evaluate and report the performance.
@@ -198,13 +241,21 @@ def objective(
 
 
 def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    trial_dict = {
+        "value": trial.value,
+        "params": trial.params,
+        "user_attrs": trial.user_attrs,
+        "kwargs": params_to_kwargs(**trial.params),
+    }
+
+    trial_dict["kwargs"]["policy_kwargs"]["activation_fn"] = trial_dict["params"][
+        "activation_fn_name"
+    ]
+
     with open(path, "w") as fp:
         json.dump(
-            {
-                "value": trial.value,
-                "params": trial.params,
-                "user_attrs": trial.user_attrs,
-            },
+            trial_dict,
             fp,
             indent=4,
         )
@@ -242,8 +293,17 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
         help="Environment ID",
         required=True,
     )
-    parser.add_argument("--n_jobs", type=int, default=1)
-    parser.add_argument("--max_episode_steps", type=int, default=20)
+    parser.add_argument("--n_jobs", type=int, default=1, help="Number of parallel jobs")
+    parser.add_argument(
+        "--max_episode_steps",
+        type=int,
+        default=20,
+        help="Max episode steps for the environment",
+    )
+    parser.add_argument(
+        "--no-logs", action="store_true", help="Disable Tensorboard logging"
+    )
+
     args = parser.parse_args()
 
     # Set pytorch num threads to 1 for faster training.
@@ -260,17 +320,19 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
 
     reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward
 
-    obj_fn = lambda trial: objective(
-        trial,
-        args.env_id,
-        args.num_balls,
-        args.max_episode_steps,
-        reward_function,
-        args.n_eval_episodes,
-        args.eval_freq,
-        args.n_timesteps,
-        start_time,
-    )
+    def obj_fn(trial):
+        return objective(
+            trial,
+            args.env_id,
+            args.num_balls,
+            args.max_episode_steps,
+            reward_function,
+            args.n_eval_episodes,
+            args.eval_freq,
+            args.n_timesteps,
+            start_time,
+            args.no_logs,
+        )
 
     try:
         study.optimize(obj_fn, n_trials=args.n_trials, timeout=3600, n_jobs=args.n_jobs)

From d8b72d710253f44e06569213c7a4e3b88c75372b Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 13:37:40 +0200
Subject: [PATCH 02/44] Remove sb3 dependency from fastfiz-env package

---
 src/fastfiz_env/make.py | 21 +++++++++++----------
 src/train.py            | 10 +++++++---
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py
index 299013f..9b13b4e 100644
--- a/src/fastfiz_env/make.py
+++ b/src/fastfiz_env/make.py
@@ -3,7 +3,6 @@
 
 from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper
 from .reward_functions import RewardFunction, DefaultReward
-from stable_baselines3.common.env_util import make_vec_env
 
 
 def make(
@@ -13,7 +12,7 @@ def make(
     num_balls: int = 16,
     max_episode_steps: int = 100,
     disable_env_checker: bool = True,
-    **kwargs
+    **kwargs,
 ) -> gym.Env:
     """
     Create an instance of the specified environment.
@@ -42,6 +41,9 @@ def make(
 def make_wrapped_env(
     env_id: str, num_balls: int, max_episode_steps: int, reward_function: RewardFunction
 ):
+    """
+    Create an instance of the specified environment with the FastFizActionWrapper.
+    """
     env = make(
         env_id,
         reward_function=reward_function,
@@ -53,19 +55,18 @@ def make_wrapped_env(
     return env
 
 
-def make_wrapped_vec_env(
+def make_callable_wrapped_env(
     env_id: str,
     num_balls: int,
     max_episode_steps: int,
-    n_envs: int,
     reward_function: RewardFunction,
 ):
+    """
+    Create a callable function that returns an instance of the specified environment with the FastFizActionWrapper.
+    This is useful for creating environments in parallel or with stable-baselines `make_vec_env` function.
+    """
 
-    def make_env():
+    def _init() -> gym.Env:
         return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function)
 
-    env = make_vec_env(
-        make_env,
-        n_envs=n_envs,
-    )
-    return env
+    return _init
diff --git a/src/train.py b/src/train.py
index 09de250..12ace36 100644
--- a/src/train.py
+++ b/src/train.py
@@ -1,7 +1,7 @@
 import argparse
 import glob
 import os
-from fastfiz_env.make import make_wrapped_vec_env
+from fastfiz_env.make import make_callable_wrapped_env
 from fastfiz_env.reward_functions import RewardFunction
 from typing import Optional
 from stable_baselines3 import PPO
@@ -11,6 +11,7 @@
     EvalCallback,
     CallbackList,
 )
+from stable_baselines3.common.env_util import make_vec_env
 
 
 def get_latest_run_id(log_path: str, name: str) -> int:
@@ -39,8 +40,11 @@ def train(
     reward_function: RewardFunction = DefaultReward,
     callbacks=None,
 ) -> None:
-    env = make_wrapped_vec_env(
-        env_id, num_balls, max_episode_steps, n_envs, reward_function
+    env = make_vec_env(
+        make_callable_wrapped_env(
+            env_id, num_balls, max_episode_steps, reward_function
+        ),
+        n_envs=n_envs,
     )
 
     model_name = get_model_name(env_id, num_balls)

From 2f1600a3a9225dbe489d210a1b36160a68449f01 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 14:04:24 +0200
Subject: [PATCH 03/44] Add params arg to set hyperparams

---
 src/train.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/train.py b/src/train.py
index 12ace36..4254eb6 100644
--- a/src/train.py
+++ b/src/train.py
@@ -1,5 +1,6 @@
 import argparse
 import glob
+import json
 import os
 from fastfiz_env.make import make_callable_wrapped_env
 from fastfiz_env.reward_functions import RewardFunction
@@ -12,6 +13,7 @@
     CallbackList,
 )
 from stable_baselines3.common.env_util import make_vec_env
+from hyperparams import params_to_kwargs
 
 
 def get_latest_run_id(log_path: str, name: str) -> int:
@@ -39,6 +41,7 @@ def train(
     models_path: str = "models/",
     reward_function: RewardFunction = DefaultReward,
     callbacks=None,
+    params: Optional[dict] = None,
 ) -> None:
     env = make_vec_env(
         make_callable_wrapped_env(
@@ -47,10 +50,14 @@ def train(
         n_envs=n_envs,
     )
 
+    hyperparams = params_to_kwargs(**params) if params else {}
+    print(hyperparams)
     model_name = get_model_name(env_id, num_balls)
 
     if model_dir is None:
-        model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logs_path)
+        model = PPO(
+            "MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams
+        )
     else:
         model = PPO.load(model_dir, env=env, verbose=1, tensorboard_log=logs_path)
         pretrained_name = model_dir.split("/")[-1].rsplit(".zip", 1)[0]
@@ -111,6 +118,14 @@ def train(
         choices=["DefaultReward", "WinningReward"],
         default="DefaultReward",
     )
+
+    # Hyper params
+    parser.add_argument(
+        "--params",
+        type=str,
+        help="Path to hyperparameters file (file must have key 'params' with dict of hyperparameters",
+    )
+
     args = parser.parse_args()
 
     reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward
@@ -123,6 +138,17 @@ def train(
     total_timesteps = args.n_time_steps
     logs_path = args.logs_path
     models_path = args.models_path
+    reward = args.reward
+    params = None
+    if args.params:
+        params_path = args.params
+        assert os.path.exists(logs_path), f"params path does not exist: {logs_path}"
+        with open(params_path, "r") as fp:
+            params = json.load(fp)
+        assert (
+            "params" in params
+        ), "params file must have key 'params' with dict of hyperparameters"
+        params = params["params"]
 
     print(
         f"Starting training on {env_id} with following settings:\n\
@@ -132,7 +158,7 @@ def train(
           model_path: {model_path}\n\
           logs_path: {logs_path}\n\
           models_path: {models_path}\n\
-          reward_function: {args.reward}\n"
+          reward_function: {reward}\n"
     )
 
     train(
@@ -144,4 +170,5 @@ def train(
         logs_path=logs_path,
         models_path=models_path,
         reward_function=reward_function,
+        params=params,
     )

From e38ada3b981cf445bacdce50931488442d37deb6 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 14:05:05 +0200
Subject: [PATCH 04/44] Add make_callable_wrapped_env to __all__

---
 src/fastfiz_env/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/fastfiz_env/__init__.py b/src/fastfiz_env/__init__.py
index 23d92a8..2be325b 100644
--- a/src/fastfiz_env/__init__.py
+++ b/src/fastfiz_env/__init__.py
@@ -28,14 +28,16 @@
 
 """
 
-from .make import make, make_wrapped_vec_env, make_wrapped_env
+__version__ = "0.0.1"
+
+from .make import make, make_wrapped_env, make_callable_wrapped_env
 from .reward_functions import DefaultReward, RewardFunction, CombinedReward
 from . import envs, utils, wrappers, reward_functions
 
 __all__ = [
     "make",
-    "make_wrapped_vec_env",
     "make_wrapped_env",
+    "make_callable_wrapped_env",
     "DefaultReward",
     "RewardFunction",
     "CombinedReward",

From 570867729cea09ce36374e8b59d84c9e6f124c82 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 14:05:26 +0200
Subject: [PATCH 05/44] Add params_to_kwargs() function

---
 src/hyperparams.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 src/hyperparams.py

diff --git a/src/hyperparams.py b/src/hyperparams.py
new file mode 100644
index 0000000..6d87690
--- /dev/null
+++ b/src/hyperparams.py
@@ -0,0 +1,52 @@
+import torch.nn as nn
+
+
+def params_to_kwargs(
+    *,
+    batch_size,
+    n_steps,
+    gamma,
+    learning_rate,
+    ent_coef,
+    clip_range,
+    n_epochs,
+    gae_lambda,
+    max_grad_norm,
+    vf_coef,
+    net_arch_type,
+    ortho_init,
+    activation_fn_name,
+    **kwargs,
+):
+    net_arch = {
+        "tiny": dict(pi=[64], vf=[64]),
+        "small": dict(pi=[64, 64], vf=[64, 64]),
+        "medium": dict(pi=[256, 256], vf=[256, 256]),
+    }[net_arch_type]
+
+    activation_fn = {
+        "tanh": nn.Tanh,
+        "relu": nn.ReLU,
+        "elu": nn.ELU,
+        "leaky_relu": nn.LeakyReLU,
+    }[activation_fn_name]
+
+    return {
+        "n_steps": n_steps,
+        "batch_size": batch_size,
+        "gamma": gamma,
+        "learning_rate": learning_rate,
+        "ent_coef": ent_coef,
+        "clip_range": clip_range,
+        "n_epochs": n_epochs,
+        "gae_lambda": gae_lambda,
+        "max_grad_norm": max_grad_norm,
+        "vf_coef": vf_coef,
+        # "sde_sample_freq": sde_sample_freq,
+        "policy_kwargs": {
+            # log_std_init=log_std_init,
+            "net_arch": net_arch,
+            "activation_fn": activation_fn,
+            "ortho_init": ortho_init,
+        },
+    }

From dd680d2211fe73a933d8e18d72e12c357ab5b5ec Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 14:06:51 +0200
Subject: [PATCH 06/44] Add params_to_kwargs() function

---
 src/optimize.py | 57 +------------------------------------------------
 1 file changed, 1 insertion(+), 56 deletions(-)

diff --git a/src/optimize.py b/src/optimize.py
index 4814509..222728b 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -16,6 +16,7 @@
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.env_util import make_vec_env
 from typing import Any, Dict
+from hyperparams import params_to_kwargs
 
 
 # https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py
@@ -87,57 +88,6 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
     )
 
 
-def params_to_kwargs(
-    *,
-    batch_size,
-    n_steps,
-    gamma,
-    learning_rate,
-    ent_coef,
-    clip_range,
-    n_epochs,
-    gae_lambda,
-    max_grad_norm,
-    vf_coef,
-    net_arch_type,
-    ortho_init,
-    activation_fn_name,
-    **kwargs,
-):
-    net_arch = {
-        "tiny": dict(pi=[64], vf=[64]),
-        "small": dict(pi=[64, 64], vf=[64, 64]),
-        "medium": dict(pi=[256, 256], vf=[256, 256]),
-    }[net_arch_type]
-
-    activation_fn = {
-        "tanh": nn.Tanh,
-        "relu": nn.ReLU,
-        "elu": nn.ELU,
-        "leaky_relu": nn.LeakyReLU,
-    }[activation_fn_name]
-
-    return {
-        "n_steps": n_steps,
-        "batch_size": batch_size,
-        "gamma": gamma,
-        "learning_rate": learning_rate,
-        "ent_coef": ent_coef,
-        "clip_range": clip_range,
-        "n_epochs": n_epochs,
-        "gae_lambda": gae_lambda,
-        "max_grad_norm": max_grad_norm,
-        "vf_coef": vf_coef,
-        # "sde_sample_freq": sde_sample_freq,
-        "policy_kwargs": {
-            # log_std_init=log_std_init,
-            "net_arch": net_arch,
-            "activation_fn": activation_fn,
-            "ortho_init": ortho_init,
-        },
-    }
-
-
 # https://github.com/optuna/optuna-examples/blob/main/rl/sb3_simple.py
 class TrialEvalCallback(EvalCallback):
     """Callback used for evaluating and reporting a trial."""
@@ -246,13 +196,8 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
         "value": trial.value,
         "params": trial.params,
         "user_attrs": trial.user_attrs,
-        "kwargs": params_to_kwargs(**trial.params),
     }
 
-    trial_dict["kwargs"]["policy_kwargs"]["activation_fn"] = trial_dict["params"][
-        "activation_fn_name"
-    ]
-
     with open(path, "w") as fp:
         json.dump(
             trial_dict,

From c1658b6314930db95eb1f6a9c3690f97f34a5b67 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 14:59:47 +0200
Subject: [PATCH 07/44] Add pyproject.toml

---
 .github/workflows/lint.yml | 30 ++++++++++++++++++++
 .github/workflows/test.yml | 33 ++++++++++++++++++++++
 pyproject.toml             | 57 ++++++++++++++++++++++++++++++++++++++
 setup.py                   | 28 ++++++++++++-------
 4 files changed, 138 insertions(+), 10 deletions(-)
 create mode 100644 .github/workflows/lint.yml
 create mode 100644 .github/workflows/test.yml
 create mode 100644 pyproject.toml

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000..0405eb7
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,30 @@
+name: Lint Python package
+
+on:
+    push:
+    pull_request:
+        branches: ['main']
+
+jobs:
+    lint:
+        runs-on: ubuntu-latest
+        strategy:
+            fail-fast: false
+            matrix:
+                python-version: ['3.10', '3.11']
+        steps:
+            - uses: actions/checkout@v4
+            - uses: chartboost/ruff-action@v1
+            - name: Set up Python ${{ matrix.python-version }}
+              uses: actions/setup-python@v5
+              with:
+                  python-version: ${{ matrix.python-version }}
+                  cache: 'pip'
+            - name: Install dependencies
+              run: |
+                  sudo apt-get install python3-opengl
+                  python -m pip install --upgrade pip
+                  pip install ".[test]"
+            - name: Run MyPy
+              run: |
+                  mypy src/fastfiz_env
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..4c60f76
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,33 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Test Python package
+
+on:
+    push:
+    pull_request:
+        branches: ['main']
+
+jobs:
+    test:
+        runs-on: ubuntu-latest
+        strategy:
+            fail-fast: false
+            matrix:
+                python-version: ['3.10', '3.11']
+
+        steps:
+            - uses: actions/checkout@v4
+            - name: Set up Python ${{ matrix.python-version }}
+              uses: actions/setup-python@v5
+              with:
+                  python-version: ${{ matrix.python-version }}
+                  cache: 'pip'
+            - name: Install dependencies
+              run: |
+                  sudo apt-get install python3-opengl
+                  python -m pip install --upgrade pip
+                  pip install ".[test]"
+            - name: Test with pytest
+              run: |
+                  pytest
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..63e4dba
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,57 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+
+[project]
+name = "fastfiz-env"
+description = "Gymnasium environments for FastFiz pool simulator."
+readme = "README.md"
+requires-python = ">=3.10"
+dynamic = ["version"]
+dependencies = [
+    "fastfiz @ git+https://github.com/P6-Pool/fastfiz.git@2af8aed22bec1faeb5ac92b98b0751a0023f3fb7",
+    "gymnasium",
+    "numpy",
+    "vectormath",
+]
+
+[project.optional-dependencies]
+dev = [
+    "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062",
+    "stable-baselines3",
+    "tqdm",
+    "rich",
+    "torch",
+    "tensorboard",
+    "optuna",
+]
+test = ["pytest", "mypy", "ruff"]
+all = [
+    # dev
+    "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062",
+    "stable-baselines3",
+    "tqdm",
+    "rich",
+    "torch",
+    "tensorboard",
+    "optuna",
+    # test
+    "pytest",
+    "mypy",
+    "ruff",
+]
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    "ignore::DeprecationWarning:tensorboard",
+    "ignore::UserWarning:gym",
+]
+
+[tool.mypy]
+ignore_missing_imports = true
+follow_imports = "silent"
+show_error_codes = true
+
+[tool.ruff]
+line-length = 127
diff --git a/setup.py b/setup.py
index 31ad11f..1d8cae7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,15 +1,23 @@
+import re
 from setuptools import setup, find_packages
 
-with open("requirements.txt") as f:
-    requirements = f.read().splitlines()
+# with open("requirements.txt") as f:
+#     requirements = f.read().splitlines()
+
+
+def get_version():
+    with open("src/fastfiz_env/__init__.py", "r") as f:
+        for line in f:
+            match = re.match(r"__version__\s*=\s*['\"]([^'\"]+)['\"]", line)
+            if match:
+                return match.group(1)
+    raise RuntimeError("Version not found in __init__.py")
+
 
 setup(
-    name="fastfiz-env",
-    description="Gymnasium environment for FastFiz pool simulator",
-    version="0.0.1",
-    license="MIT",
-    install_requires=requirements,
-    test_requires=["pytest"],
-    packages=find_packages(where="src"),
-    package_dir={"": "src"},
+    version=get_version(),
+    # install_requires=requirements,
+    # test_requires=["pytest"],
+    # packages=find_packages(where="src"),
+    # package_dir={"": "src"},
 )

From fe87f5af50ea3ee0f654478b2e7cbfcf40ffebf4 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 15:10:08 +0200
Subject: [PATCH 08/44] Add options arg to envs

---
 src/fastfiz_env/envs/pockets_fastfiz.py | 10 +++++++---
 src/fastfiz_env/envs/simple_fastfiz.py  | 10 +++++++---
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/fastfiz_env/envs/pockets_fastfiz.py b/src/fastfiz_env/envs/pockets_fastfiz.py
index de94bb7..e635cbf 100644
--- a/src/fastfiz_env/envs/pockets_fastfiz.py
+++ b/src/fastfiz_env/envs/pockets_fastfiz.py
@@ -28,6 +28,7 @@ def __init__(
         *,
         reward_function: RewardFunction = DefaultReward,
         num_balls: int = 16,
+        options: Optional[dict] = None,
     ) -> None:
         super().__init__()
         self.num_balls = num_balls
@@ -37,6 +38,8 @@ def __init__(
         self.reward = reward_function
         self.max_episode_steps = None
         self.elapsed_steps = None
+        self.options = options or {}
+        self._quick_terminate = self.options.get("quick_terminate", False)
 
     def _get_time_limit_attrs(self):
         try:
@@ -148,10 +151,11 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         return possible_shot(self.table_state, shot_params)
 
     def _is_terminal_state(self) -> bool:
-        pocketed = num_balls_pocketed(self.table_state)
+        if self._quick_terminate:
+            pocketed = num_balls_pocketed(self.table_state)
 
-        if pocketed <= self._prev_pocketed:
-            return True
+            if pocketed <= self._prev_pocketed:
+                return True
 
         return terminal_state(self.table_state)
 
diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py
index a9f8875..b316356 100644
--- a/src/fastfiz_env/envs/simple_fastfiz.py
+++ b/src/fastfiz_env/envs/simple_fastfiz.py
@@ -26,6 +26,7 @@ def __init__(
         *,
         reward_function: RewardFunction = DefaultReward,
         num_balls: int = 16,
+        options: Optional[dict] = None,
     ) -> None:
         super().__init__()
         self.num_balls = num_balls
@@ -35,6 +36,8 @@ def __init__(
         self.reward = reward_function
         self.max_episode_steps = None
         self.elapsed_steps = None
+        self.options = options or {}
+        self._quick_terminate = self.options.get("quick_terminate", False)
 
     def _get_time_limit_attrs(self):
         try:
@@ -131,10 +134,11 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         return possible_shot(self.table_state, shot_params)
 
     def _is_terminal_state(self) -> bool:
-        pocketed = num_balls_pocketed(self.table_state)
+        if self._quick_terminate:
+            pocketed = num_balls_pocketed(self.table_state)
 
-        if pocketed <= self._prev_pocketed:
-            return True
+            if pocketed <= self._prev_pocketed:
+                return True
 
         return terminal_state(self.table_state)
 

From e537e545cc7bf7aa1b6fe77acbf144ed765aebdf Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 15:32:10 +0200
Subject: [PATCH 09/44] Add env options arg

---
 src/fastfiz_env/make.py | 13 +++++++++++--
 src/optimize.py         | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py
index 9b13b4e..14d315b 100644
--- a/src/fastfiz_env/make.py
+++ b/src/fastfiz_env/make.py
@@ -1,3 +1,4 @@
+from typing import Optional
 from gymnasium.envs.registration import EnvSpec
 import gymnasium as gym
 
@@ -39,7 +40,11 @@ def make(
 
 
 def make_wrapped_env(
-    env_id: str, num_balls: int, max_episode_steps: int, reward_function: RewardFunction
+    env_id: str,
+    num_balls: int,
+    max_episode_steps: int,
+    reward_function: RewardFunction,
+    **kwargs,
 ):
     """
     Create an instance of the specified environment with the FastFizActionWrapper.
@@ -50,6 +55,7 @@ def make_wrapped_env(
         num_balls=num_balls,
         max_episode_steps=max_episode_steps,
         disable_env_checker=False,
+        **kwargs,
     )
     env = FastFizActionWrapper(env, action_space_id=ActionSpaces.NO_OFFSET_3D)
     return env
@@ -60,6 +66,7 @@ def make_callable_wrapped_env(
     num_balls: int,
     max_episode_steps: int,
     reward_function: RewardFunction,
+    **kwargs,
 ):
     """
     Create a callable function that returns an instance of the specified environment with the FastFizActionWrapper.
@@ -67,6 +74,8 @@ def make_callable_wrapped_env(
     """
 
     def _init() -> gym.Env:
-        return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function)
+        return make_wrapped_env(
+            env_id, num_balls, max_episode_steps, reward_function, **kwargs
+        )
 
     return _init
diff --git a/src/optimize.py b/src/optimize.py
index 222728b..0de08dc 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -135,13 +135,14 @@ def objective(
     n_timesteps: int,
     start_time: str,
     no_logs: bool,
+    env_kwargs: dict,
 ) -> float:
     kwargs = sample_ppo_params(trial)
     N_ENVS = 4
 
     env = make_vec_env(
         make_callable_wrapped_env(
-            env_id, num_balls, max_episode_steps, reward_function
+            env_id, num_balls, max_episode_steps, reward_function, **env_kwargs
         ),
         n_envs=N_ENVS,
     )
@@ -206,6 +207,28 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
         )
 
 
+class StoreDict(argparse.Action):
+    """
+    Custom argparse action for storing dict.
+
+    In: args1:0.0 args2:"dict(a=1)"
+    Out: {'args1': 0.0, arg2: dict(a=1)}
+    """
+
+    def __init__(self, option_strings, dest, nargs=None, **kwargs):
+        self._nargs = nargs
+        super().__init__(option_strings, dest, nargs=nargs, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        arg_dict = {}
+        for arguments in values:  # type: ignore
+            key = arguments.split(":")[0]
+            value = ":".join(arguments.split(":")[1:])
+            # Evaluate the string as python code
+            arg_dict[key] = eval(value)
+        setattr(namespace, self.dest, arg_dict)
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Description of your program")
     parser.add_argument("--n_trials", type=int, default=20, help="Number of trials")
@@ -249,6 +272,15 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
         "--no-logs", action="store_true", help="Disable Tensorboard logging"
     )
 
+    parser.add_argument(
+        "--env-options",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional keyword argument to pass to the env constructor",
+        default={},
+    )
+
     args = parser.parse_args()
 
     # Set pytorch num threads to 1 for faster training.
@@ -265,6 +297,8 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None:
 
     reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward
 
+    env_kwargs = {"options": args.env_options}
+
     def obj_fn(trial):
         return objective(
             trial,
@@ -277,6 +311,7 @@ def obj_fn(trial):
             args.n_timesteps,
             start_time,
             args.no_logs,
+            env_kwargs,
         )
 
     try:

From bb9a25c93fb0bec0606d63c909d8ce6c8f5a7bca Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 18:47:16 +0200
Subject: [PATCH 10/44] Fix assert value

---
 src/tests/envs/test_envs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/envs/test_envs.py b/src/tests/envs/test_envs.py
index df6bb51..5ae81f7 100644
--- a/src/tests/envs/test_envs.py
+++ b/src/tests/envs/test_envs.py
@@ -29,7 +29,7 @@ def test_step(self):
         obs, reward, done, truncated, info = env.step(action)
         self.assertEqual(obs.shape, (16, 2))
         self.assertEqual(reward, 1)
-        self.assertEqual(done, True)  # Will terminate as no balls were pocketed
+        self.assertEqual(done, False)
         self.assertEqual(truncated, False)
         self.assertEqual(info, {"is_success": False})
 

From 368e7a0a49b48eebb0b4f7a338820409a7814396 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 18:47:39 +0200
Subject: [PATCH 11/44] Remove p5 import

---
 src/fastfiz_env/utils/fastfiz/renderer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fastfiz_env/utils/fastfiz/renderer.py b/src/fastfiz_env/utils/fastfiz/renderer.py
index cc2bcc1..43c084f 100644
--- a/src/fastfiz_env/utils/fastfiz/renderer.py
+++ b/src/fastfiz_env/utils/fastfiz/renderer.py
@@ -1,4 +1,3 @@
-from p5 import *
 import fastfiz as ff
 import vectormath as vmath
 

From fb98c9628e766a884a9e062c8e844a69217b80cc Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 18:48:20 +0200
Subject: [PATCH 12/44] Add swig and gsl to dependencies

---
 .github/workflows/test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4c60f76..7287df4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -25,7 +25,8 @@ jobs:
                   cache: 'pip'
             - name: Install dependencies
               run: |
-                  sudo apt-get install python3-opengl
+                  sudo apt update
+                  sudo apt install python3-opengl swig libgsl-dev
                   python -m pip install --upgrade pip
                   pip install ".[test]"
             - name: Test with pytest

From bcf7a07379d625312d16c04e1e0e273a041ff4c5 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 18:48:39 +0200
Subject: [PATCH 13/44] Format with Ruff

---
 src/eval.py                                   | 12 +--
 src/fastfiz_env/envs/frames_fastfiz.py        | 24 ++---
 src/fastfiz_env/envs/pockets_fastfiz.py       |  8 +-
 src/fastfiz_env/envs/simple_fastfiz.py        |  4 +-
 src/fastfiz_env/envs/testing_fastfiz.py       | 21 +----
 src/fastfiz_env/envs/utils.py                 |  4 +-
 src/fastfiz_env/make.py                       |  4 +-
 .../reward_functions/combined_reward.py       | 10 +-
 .../delta_best_total_distance_reward.py       |  8 +-
 .../common/impossible_shot_reward.py          |  5 +-
 .../common/total_distance_reward.py           |  4 +-
 .../reward_functions/common/weights.py        | 36 ++-----
 .../reward_functions/reward_function.py       |  8 +-
 src/fastfiz_env/utils/fastfiz/fastfiz.py      | 69 ++++----------
 src/fastfiz_env/utils/fastfiz/renderer.py     | 40 ++------
 src/fastfiz_env/wrappers/action.py            | 25 ++---
 .../wrappers/time_limit_injection.py          |  4 +-
 src/optimize.py                               | 52 +++--------
 src/tests/utils/test_features.py              |  1 -
 src/tests/utils/test_reward_functions.py      | 93 +++++--------------
 src/train.py                                  | 12 +--
 21 files changed, 109 insertions(+), 335 deletions(-)

diff --git a/src/eval.py b/src/eval.py
index f481cf2..1ace5d3 100644
--- a/src/eval.py
+++ b/src/eval.py
@@ -85,13 +85,9 @@ def decide_shot(self, table_state: ff.TableState) -> Optional[ff.ShotParams]:
         for _ in range(10):
             if isinstance(self.env, FramesFastFiz):
                 if self.prev_ts is None:
-                    obs = self.env.compute_observation(
-                        table_state, table_state, self.shot
-                    )
+                    obs = self.env.compute_observation(table_state, table_state, self.shot)
                 else:
-                    obs = self.env.compute_observation(
-                        self.prev_ts, table_state, self.shot
-                    )
+                    obs = self.env.compute_observation(self.prev_ts, table_state, self.shot)
             elif isinstance(self.env, PocketsFastFiz):
                 obs = self.env.compute_observation(table_state)
             else:
@@ -114,9 +110,7 @@ def main() -> None:
     parser.add_argument("-m", "--model", type=str, help="Path to the model file")
     args = parser.parse_args()
 
-    assert args.model is not None and os.path.exists(
-        args.model
-    ), f"Model file not found: {args.model}"
+    assert args.model is not None and os.path.exists(args.model), f"Model file not found: {args.model}"
 
     model = PPO.load(args.model)
 
diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py
index eb586ce..d18f464 100644
--- a/src/fastfiz_env/envs/frames_fastfiz.py
+++ b/src/fastfiz_env/envs/frames_fastfiz.py
@@ -28,9 +28,7 @@ class FramesFastFiz(gym.Env):
     TOTAL_BALLS = 16  # Including the cue ball
     num_balls = 2
 
-    def __init__(
-        self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16
-    ) -> None:
+    def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16) -> None:
         super().__init__()
         if num_balls < 2:
             warnings.warn(
@@ -47,15 +45,11 @@ def __init__(
 
     def _max_episode_steps(self):
         if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None:
-            self.max_episode_steps = self.get_wrapper_attr(
-                "_time_limit_max_episode_steps"
-            )
+            self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps")
             print(f"Setting max episode steps to {self.max_episode_steps}")
             self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(
-        self, *, seed: Optional[int] = None, options: Optional[dict] = None
-    ) -> tuple[np.ndarray, dict]:
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
         """
         Reset the environment to its initial state.
         """
@@ -187,14 +181,9 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         """
         Check if the shot is possible.
         """
-        return (
-            self.table_state.isPhysicallyPossible(shot_params)
-            == ff.TableState.OK_PRECONDITION
-        )
+        return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
 
-    def _compute_observation(
-        self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]
-    ) -> np.ndarray:
+    def _compute_observation(self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]) -> np.ndarray:
         return self.compute_observation(prev_table_state, self.table_state, shot)
 
     @classmethod
@@ -236,8 +225,7 @@ def compute_observation(
                 pocketed = is_pocketed_state(gb.state)
                 frames_seq[frame][gb.number] = [
                     *normalize_ball_positions((gb.position.x, gb.position.y)),  # type: ignore
-                    normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2
-                    - 1,
+                    normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - 1,
                     pocketed,
                 ]
         return frames_seq
diff --git a/src/fastfiz_env/envs/pockets_fastfiz.py b/src/fastfiz_env/envs/pockets_fastfiz.py
index e635cbf..ae7c9bf 100644
--- a/src/fastfiz_env/envs/pockets_fastfiz.py
+++ b/src/fastfiz_env/envs/pockets_fastfiz.py
@@ -54,9 +54,7 @@ def _get_time_limit_attrs(self):
 
         self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(
-        self, *, seed: Optional[int] = None, options: Optional[dict] = None
-    ) -> tuple[np.ndarray, dict]:
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
         super().reset(seed=seed)
 
         if self.max_episode_steps is None or self.elapsed_steps is None:
@@ -66,9 +64,7 @@ def reset(
         self.reward.reset(self.table_state)
         self._prev_pocketed = 0
 
-        self._pocket_centers = normalize_ball_positions(
-            pocket_centers(self.table_state)
-        )
+        self._pocket_centers = normalize_ball_positions(pocket_centers(self.table_state))
 
         observation = self._get_observation()
         info = self._get_info()
diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py
index b316356..f1105d2 100644
--- a/src/fastfiz_env/envs/simple_fastfiz.py
+++ b/src/fastfiz_env/envs/simple_fastfiz.py
@@ -52,9 +52,7 @@ def _get_time_limit_attrs(self):
 
         self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(
-        self, *, seed: Optional[int] = None, options: Optional[dict] = None
-    ) -> tuple[np.ndarray, dict]:
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
         super().reset(seed=seed)
 
         if self.max_episode_steps is None or self.elapsed_steps is None:
diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py
index b47dddd..2265180 100644
--- a/src/fastfiz_env/envs/testing_fastfiz.py
+++ b/src/fastfiz_env/envs/testing_fastfiz.py
@@ -68,14 +68,10 @@ def __init__(
 
     def _max_episode_steps(self):
         if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None:
-            self.max_episode_steps = self.get_wrapper_attr(
-                "_time_limit_max_episode_steps"
-            )
+            self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps")
             self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(
-        self, *, seed: Optional[int] = None, options: Optional[dict] = None
-    ) -> tuple[np.ndarray, dict]:
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
         super().reset(seed=seed)
 
         if self.max_episode_steps is None:
@@ -96,9 +92,7 @@ def reset(
         observation = self._get_observation()
         info = self._get_info()
 
-        self.logger.info(
-            "Reset(%s) - initial observation:\n%s", self.n_episodes, observation
-        )
+        self.logger.info("Reset(%s) - initial observation:\n%s", self.n_episodes, observation)
         self.logger.info("Reset(%s) - initial info: %s", self.n_episodes, info)
 
         self.n_episodes += 1
@@ -156,9 +150,7 @@ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]
     def _get_observation(self):
         ball_positions = get_ball_positions(self.table_state)[: self.TOTAL_BALLS]
         # ball_positions = normalize_ball_positions(ball_positions)  # Normalize to [0, 1]
-        ball_positions = (
-            normalize_ball_positions(ball_positions) * 2 - 1
-        )  # Normalize to [-1, 1] (symmetric)
+        ball_positions = normalize_ball_positions(ball_positions) * 2 - 1  # Normalize to [-1, 1] (symmetric)
         observation = np.zeros((self.TOTAL_BALLS, 2), dtype=np.float32)
         for i, ball_pos in enumerate(ball_positions):
             observation[i] = [*ball_pos]
@@ -224,7 +216,4 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         """
         Check if the shot is possible.
         """
-        return (
-            self.table_state.isPhysicallyPossible(shot_params)
-            == ff.TableState.OK_PRECONDITION
-        )
+        return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
diff --git a/src/fastfiz_env/envs/utils.py b/src/fastfiz_env/envs/utils.py
index ffc27e1..2e1ccb4 100644
--- a/src/fastfiz_env/envs/utils.py
+++ b/src/fastfiz_env/envs/utils.py
@@ -26,6 +26,4 @@ def possible_shot(table_state: ff.TableState, shot_params: ff.ShotParams) -> boo
     """
     Check if the shot is possible.
     """
-    return (
-        table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
-    )
+    return table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py
index 14d315b..0b46333 100644
--- a/src/fastfiz_env/make.py
+++ b/src/fastfiz_env/make.py
@@ -74,8 +74,6 @@ def make_callable_wrapped_env(
     """
 
     def _init() -> gym.Env:
-        return make_wrapped_env(
-            env_id, num_balls, max_episode_steps, reward_function, **kwargs
-        )
+        return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, **kwargs)
 
     return _init
diff --git a/src/fastfiz_env/reward_functions/combined_reward.py b/src/fastfiz_env/reward_functions/combined_reward.py
index f86c854..eb4bb4b 100644
--- a/src/fastfiz_env/reward_functions/combined_reward.py
+++ b/src/fastfiz_env/reward_functions/combined_reward.py
@@ -75,14 +75,12 @@ def reward(
             total_reward += reward
 
             if issubclass(reward_function.__class__, BinaryReward):
-                if (
-                    reward == 1 * reward_function.weight()
-                    and self.short_circuit
-                    and reward_function.short_circuit
-                ):
+                if reward == 1 * reward_function.weight() and self.short_circuit and reward_function.short_circuit:
                     return total_reward
 
         return total_reward
 
     def __str__(self) -> str:
-        return f"CombinedReward({[str(reward) for reward in self.reward_functions]}, {None}, short_circuit={self.short_circuit})"
+        return (
+            f"CombinedReward({[str(reward) for reward in self.reward_functions]}, {None}, short_circuit={self.short_circuit})"
+        )
diff --git a/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py b/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py
index 97cba01..eead78a 100644
--- a/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py
+++ b/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py
@@ -19,9 +19,7 @@ def reset(self, table_state) -> None:
         self.pockets = pocket_centers(table_state)
         # num_balls = num_balls_in_play(table_state)
         ball_positions = get_ball_positions(table_state)[1 : self.num_balls]
-        self.min_total_dist = np.sum(
-            distances_to_closest_pocket(ball_positions, self.pockets)
-        )
+        self.min_total_dist = np.sum(distances_to_closest_pocket(ball_positions, self.pockets))
 
     def reward(
         self,
@@ -34,9 +32,7 @@ def reward(
         """
         num_balls = num_balls_in_play(table_state)
         ball_positions = get_ball_positions(table_state)[1:num_balls]
-        new_total_dist = np.sum(
-            distances_to_closest_pocket(ball_positions, self.pockets)
-        )
+        new_total_dist = np.sum(distances_to_closest_pocket(ball_positions, self.pockets))
 
         reward = float(self.min_total_dist - new_total_dist)
 
diff --git a/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py b/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py
index 31878d2..d744eb5 100644
--- a/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py
+++ b/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py
@@ -18,8 +18,5 @@ def reward(
         Reward function returns 1 if the shot is impossible, 0 otherwise.
         """
         shot_params = ff.ShotParams(*action)
-        impossible_shot = (
-            table_state.isPhysicallyPossible(shot_params)
-            != ff.TableState.OK_PRECONDITION
-        )
+        impossible_shot = table_state.isPhysicallyPossible(shot_params) != ff.TableState.OK_PRECONDITION
         return 1 if impossible_shot else 0
diff --git a/src/fastfiz_env/reward_functions/common/total_distance_reward.py b/src/fastfiz_env/reward_functions/common/total_distance_reward.py
index 9d2aac2..30e05ac 100644
--- a/src/fastfiz_env/reward_functions/common/total_distance_reward.py
+++ b/src/fastfiz_env/reward_functions/common/total_distance_reward.py
@@ -29,7 +29,5 @@ def reward(
         """
         num_balls = num_balls_in_play(table_state)
         ball_positions = get_ball_positions(table_state)[1:num_balls]
-        total_distance = np.sum(
-            distances_to_closest_pocket(ball_positions, self.pockets)
-        )
+        total_distance = np.sum(distances_to_closest_pocket(ball_positions, self.pockets))
         return total_distance
diff --git a/src/fastfiz_env/reward_functions/common/weights.py b/src/fastfiz_env/reward_functions/common/weights.py
index 586fbc0..6b0622d 100644
--- a/src/fastfiz_env/reward_functions/common/weights.py
+++ b/src/fastfiz_env/reward_functions/common/weights.py
@@ -5,53 +5,35 @@
 NegativeConstantWeight = -ConstantWeight
 
 
-def ConstantWeightMaxSteps(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def ConstantWeightMaxSteps(num_balls: int, current_step: int, max_steps: int | None) -> float:
     assert max_steps is not None, "Max steps must be defined for ConstantWeightMaxSteps"
     return ConstantWeight / max_steps
 
 
-def NegativeConstantWeightMaxSteps(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
-    assert (
-        max_steps is not None
-    ), "Max steps must be defined for NegativeConstantWeightMaxSteps"
+def NegativeConstantWeightMaxSteps(num_balls: int, current_step: int, max_steps: int | None) -> float:
+    assert max_steps is not None, "Max steps must be defined for NegativeConstantWeightMaxSteps"
     return NegativeConstantWeight / max_steps
 
 
-def ConstantWeightNumBalls(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def ConstantWeightNumBalls(num_balls: int, current_step: int, max_steps: int | None) -> float:
     return ConstantWeight / num_balls
 
 
-def NegativeConstantWeightNumBalls(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def NegativeConstantWeightNumBalls(num_balls: int, current_step: int, max_steps: int | None) -> float:
     return NegativeConstantWeight / num_balls
 
 
-def ConstantWeightBalls(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def ConstantWeightBalls(num_balls: int, current_step: int, max_steps: int | None) -> float:
     return ConstantWeight / (num_balls - 1)
 
 
-def NegativeConstantWeightBalls(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def NegativeConstantWeightBalls(num_balls: int, current_step: int, max_steps: int | None) -> float:
     return NegativeConstantWeight / (num_balls - 1)
 
 
-def ConstantWeightCurrentStep(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def ConstantWeightCurrentStep(num_balls: int, current_step: int, max_steps: int | None) -> float:
     return ConstantWeight / current_step
 
 
-def NegativeConstantWeightCurrentStep(
-    num_balls: int, current_step: int, max_steps: int | None
-) -> float:
+def NegativeConstantWeightCurrentStep(num_balls: int, current_step: int, max_steps: int | None) -> float:
     return NegativeConstantWeight / current_step
diff --git a/src/fastfiz_env/reward_functions/reward_function.py b/src/fastfiz_env/reward_functions/reward_function.py
index f55e5cf..32545dd 100644
--- a/src/fastfiz_env/reward_functions/reward_function.py
+++ b/src/fastfiz_env/reward_functions/reward_function.py
@@ -81,9 +81,7 @@ def get_reward(
             float: The calculated reward value.
         """
         if not self.__reset_called:
-            raise RuntimeError(
-                f"{self.__class__.__name__} reset() method must be called before calling get_reward()."
-            )
+            raise RuntimeError(f"{self.__class__.__name__} reset() method must be called before calling get_reward().")
         self.current_step += 1
         return self.reward(prev_table_state, table_state, action) * self.weight()
 
@@ -115,9 +113,7 @@ def weight(
             float: The weight of the reward function.
         """
         if callable(self.__weight):
-            return self.__weight(
-                self.num_balls, self.current_step, self.max_episode_steps
-            )
+            return self.__weight(self.num_balls, self.current_step, self.max_episode_steps)
         return self.__weight
 
     def __str__(self):
diff --git a/src/fastfiz_env/utils/fastfiz/fastfiz.py b/src/fastfiz_env/utils/fastfiz/fastfiz.py
index 8a5f4ad..d4c936c 100644
--- a/src/fastfiz_env/utils/fastfiz/fastfiz.py
+++ b/src/fastfiz_env/utils/fastfiz/fastfiz.py
@@ -44,13 +44,7 @@ def num_balls_in_play(table_state: ff.TableState) -> int:
     Returns:
         int: The number of balls in play.
     """
-    return len(
-        [
-            i
-            for i in range(table_state.getNumBalls())
-            if table_state.getBall(i).isInPlay()
-        ]
-    )
+    return len([i for i in range(table_state.getNumBalls()) if table_state.getBall(i).isInPlay()])
 
 
 def num_balls_pocketed(
@@ -71,14 +65,10 @@ def num_balls_pocketed(
         int: The number of balls pocketed.
     """
     stop = table_state.getNumBalls() if range_stop is None else range_stop
-    return len(
-        [i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()]
-    )
+    return len([i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()])
 
 
-def any_ball_has_moved(
-    prev_ball_positions: np.ndarray, ball_positions: np.ndarray
-) -> bool:
+def any_ball_has_moved(prev_ball_positions: np.ndarray, ball_positions: np.ndarray) -> bool:
     """
     Check if any ball has moved by comparing the previous ball positions with the current ball positions.
 
@@ -176,9 +166,7 @@ def distance_to_closest_pocket(ball_position: np.ndarray, pockets: np.ndarray) -
     return np.min(distance_to_pockets(ball_position, pockets))
 
 
-def distances_to_closest_pocket(
-    ball_positions: np.ndarray, pockets: np.ndarray
-) -> np.ndarray:
+def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) -> np.ndarray:
     """
     Calculates the distances from each ball position to the closest pocket.
 
@@ -189,12 +177,7 @@ def distances_to_closest_pocket(
     Returns:
         np.ndarray: An array of distances from each ball position to the closest pocket.
     """
-    return np.array(
-        [
-            distance_to_closest_pocket(ball_position, pockets)
-            for ball_position in ball_positions
-        ]
-    )
+    return np.array([distance_to_closest_pocket(ball_position, pockets) for ball_position in ball_positions])
 
 
 def create_table_state(n_balls: int) -> ff.TableState:
@@ -223,9 +206,7 @@ def create_table_state(n_balls: int) -> ff.TableState:
     return table_state
 
 
-def create_random_table_state(
-    n_balls: int, seed: Optional[int] = None
-) -> ff.TableState:
+def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.TableState:
     """
     Creates a random table state with the specified number of balls.
 
@@ -241,9 +222,7 @@ def create_random_table_state(
     return table_state
 
 
-def randomize_table_state(
-    table_state: ff.TableState, seed: Optional[int] = None
-) -> None:
+def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None) -> None:
     """
     Randomizes the positions of the balls on the pool table within the given table state.
 
@@ -297,9 +276,7 @@ def randomize_table_state(
     return table_state
 
 
-def map_action_to_shot_params(
-    table_state: ff.TableState, action: np.ndarray
-) -> np.ndarray:
+def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray:
     """
     Maps the given action values to the corresponding shot parameters within the specified ranges.
 
@@ -313,17 +290,13 @@ def map_action_to_shot_params(
     """
     a = np.interp(action[0], [0, 0], [0, 0])
     b = np.interp(action[1], [0, 0], [0, 0])
-    theta = np.interp(
-        action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001]
-    )
+    theta = np.interp(action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001])
     phi = np.interp(action[3], [-1, 1], [0, 360])
     v = np.interp(action[4], [-1, 1], [0, table_state.MAX_VELOCITY - 0.001])
     return np.array([0, 0, theta, phi, v], dtype=np.float64)
 
 
-def shot_params_from_action(
-    table_state: ff.TableState, action: np.ndarray
-) -> ff.ShotParams:
+def shot_params_from_action(table_state: ff.TableState, action: np.ndarray) -> ff.ShotParams:
     """
     Converts an action into shot parameters.
 
@@ -349,21 +322,11 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam
     MAX_THETA = ff.TableState.MAX_THETA
     MAX_VELOCITY = ff.TableState.MAX_VELOCITY
 
-    a = np.interp(
-        action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET]
-    )
-    b = np.interp(
-        action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET]
-    )
-    theta = np.interp(
-        action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA]
-    )
-    phi = np.interp(
-        action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI]
-    )
-    velocity = np.interp(
-        action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY]
-    )
+    a = np.interp(action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET])
+    b = np.interp(action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET])
+    theta = np.interp(action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA])
+    phi = np.interp(action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI])
+    velocity = np.interp(action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY])
 
     # print(f"a: {a}, b: {b}, theta: {theta}, phi: {phi}, velocity: {velocity}")
 
@@ -371,7 +334,7 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam
 
 
 def normalize_ball_positions(
-    ball_positions: np.ndarray[float, np.dtype[np.float32]]
+    ball_positions: np.ndarray[float, np.dtype[np.float32]],
 ) -> np.ndarray[float, np.dtype[np.float32]]:
     """
     Normalize the ball positions to be within the range [0, 1].
diff --git a/src/fastfiz_env/utils/fastfiz/renderer.py b/src/fastfiz_env/utils/fastfiz/renderer.py
index 43c084f..1a44b29 100644
--- a/src/fastfiz_env/utils/fastfiz/renderer.py
+++ b/src/fastfiz_env/utils/fastfiz/renderer.py
@@ -41,43 +41,25 @@ def update(
         time_since_event_start = time_since_shot_start - cur_state.e_time
 
         def calc_sliding_displacement(delta_time: float) -> vmath.Vector2:
-            rotational_velocity: vmath.Vector3 = GameBall.RADIUS * vmath.Vector3(
-                0, 0, cur_state.ang_vel.z
-            ).cross(cur_state.ang_vel)
-            relative_velocity = cur_state.vel + vmath.Vector2(
-                rotational_velocity.x, rotational_velocity.y
+            rotational_velocity: vmath.Vector3 = GameBall.RADIUS * vmath.Vector3(0, 0, cur_state.ang_vel.z).cross(
+                cur_state.ang_vel
             )
+            relative_velocity = cur_state.vel + vmath.Vector2(rotational_velocity.x, rotational_velocity.y)
             self.velocity = (
-                cur_state.vel
-                - delta_time
-                * gravitational_const
-                * sliding_friction_const
-                * relative_velocity.normalize()
+                cur_state.vel - delta_time * gravitational_const * sliding_friction_const * relative_velocity.normalize()
             )
             return (
                 cur_state.vel * delta_time
-                - 0.5
-                * sliding_friction_const
-                * gravitational_const
-                * delta_time**2
-                * relative_velocity.normalize()
+                - 0.5 * sliding_friction_const * gravitational_const * delta_time**2 * relative_velocity.normalize()
             )
 
         def calc_rolling_displacement(delta_time: float) -> vmath.Vector2:
             self.velocity = (
-                cur_state.vel
-                - gravitational_const
-                * rolling_friction_const
-                * delta_time
-                * cur_state.vel.copy().normalize()
+                cur_state.vel - gravitational_const * rolling_friction_const * delta_time * cur_state.vel.copy().normalize()
             )
             return (
                 cur_state.vel * delta_time
-                - 0.5
-                * rolling_friction_const
-                * gravitational_const
-                * delta_time**2
-                * cur_state.vel.copy().normalize()
+                - 0.5 * rolling_friction_const * gravitational_const * delta_time**2 * cur_state.vel.copy().normalize()
             )
 
         displacement = vmath.Vector2(0, 0)
@@ -103,14 +85,10 @@ def _get_relevant_ball_states_from_shot(self, shot: ff.Shot):
         for event in shot.getEventList():
             event: ff.Event
             if event.getBall1() == self.number:
-                new_ball_event = _BallState.from_event_and_ball(
-                    event, event.getBall1Data()
-                )
+                new_ball_event = _BallState.from_event_and_ball(event, event.getBall1Data())
                 relevant_states.append(new_ball_event)
             elif event.getBall2() == self.number:
-                new_ball_event = _BallState.from_event_and_ball(
-                    event, event.getBall2Data()
-                )
+                new_ball_event = _BallState.from_event_and_ball(event, event.getBall2Data())
                 relevant_states.append(new_ball_event)
 
         return relevant_states
diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py
index 3185abb..bb59399 100644
--- a/src/fastfiz_env/wrappers/action.py
+++ b/src/fastfiz_env/wrappers/action.py
@@ -123,10 +123,7 @@ def __init__(
         self.action_space_id = action_space_id
         self.action_space = self.SPACES[action_space_id.name]
 
-    def action(
-        self, action: np.ndarray[float, np.dtype[np.float32]]
-    ) -> np.ndarray[float, np.dtype[np.float32]]:
-
+    def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[float, np.dtype[np.float32]]:
         # Offset a and b are always 0
         offset_a = 0
         offset_b = 0
@@ -143,9 +140,7 @@ def action(
                 phi = vec_to_abs_deg(vec_phi)
 
                 vec_velocity = vec_length(vec_theta + vec_phi)
-                velocity = np.interp(
-                    vec_velocity, (0, 2), (self.MIN_VELOCITY, self.MAX_VELOCITY)
-                )
+                velocity = np.interp(vec_velocity, (0, 2), (self.MIN_VELOCITY, self.MAX_VELOCITY))
 
             case ActionSpaces.VECTOR_2D:
                 if np.allclose(action, 0):
@@ -167,9 +162,7 @@ def action(
                 r, theta, phi = spherical_coordinates(action)
                 theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA))
                 phi = np.interp(phi, (0, 360), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(
-                    r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)
-                )
+                velocity = np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY))
 
             case ActionSpaces.NO_OFFSET_5D:
                 if np.allclose(action, 0):
@@ -181,21 +174,15 @@ def action(
                 vec_phi = action[2:4]
                 phi = vec_to_abs_deg(vec_phi)
 
-                velocity = np.interp(
-                    action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)
-                )
+                velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
             case ActionSpaces.NORM_PARAMS_5D:
                 theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
                 phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(
-                    action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)
-                )
+                velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
             case ActionSpaces.NO_OFFSET_NORM_PARAMS_3D:
                 theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
                 phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(
-                    action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)
-                )
+                velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
 
         action = np.array([offset_a, offset_b, theta, phi, velocity])
         return action
diff --git a/src/fastfiz_env/wrappers/time_limit_injection.py b/src/fastfiz_env/wrappers/time_limit_injection.py
index 40a348d..c91072e 100644
--- a/src/fastfiz_env/wrappers/time_limit_injection.py
+++ b/src/fastfiz_env/wrappers/time_limit_injection.py
@@ -32,7 +32,5 @@ def __init__(self, env):
         elapsed_steps = get_wrapper_attr(self.env, gym.wrappers.TimeLimit, "_elapsed_steps")  # type: ignore
 
         # Inject the max_episode_steps attribute into the base environment.
-        inject_attribute_into_base_env(
-            self.env, "_max_episode_steps", max_episode_steps
-        )
+        inject_attribute_into_base_env(self.env, "_max_episode_steps", max_episode_steps)
         inject_attribute_into_base_env(self.env, "_elapsed_steps", elapsed_steps)
diff --git a/src/optimize.py b/src/optimize.py
index 0de08dc..af61169 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -28,26 +28,16 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
     :return:
     """
     batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 256, 512])
-    n_steps = trial.suggest_categorical(
-        "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
-    )
-    gamma = trial.suggest_categorical(
-        "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
-    )
+    n_steps = trial.suggest_categorical("n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048])
+    gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
     learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
     ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True)
     clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4])
     n_epochs = trial.suggest_categorical("n_epochs", [1, 5, 10, 20])
-    gae_lambda = trial.suggest_categorical(
-        "gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]
-    )
-    max_grad_norm = trial.suggest_categorical(
-        "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]
-    )
+    gae_lambda = trial.suggest_categorical("gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0])
+    max_grad_norm = trial.suggest_categorical("max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5])
     vf_coef = trial.suggest_float("vf_coef", 0, 1)
-    net_arch_type = trial.suggest_categorical(
-        "net_arch_type", ["tiny", "small", "medium"]
-    )
+    net_arch_type = trial.suggest_categorical("net_arch_type", ["tiny", "small", "medium"])
     # Uncomment for gSDE (continuous actions)
     # log_std_init = trial.suggest_float("log_std_init", -4, 1)
     # Uncomment for gSDE (continuous action)
@@ -56,9 +46,7 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
     ortho_init = trial.suggest_categorical("ortho_init", [False])
     # ortho_init = trial.suggest_categorical('ortho_init', [False, True])
     # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
-    activation_fn_name = trial.suggest_categorical(
-        "activation_fn_name", ["tanh", "relu"]
-    )
+    activation_fn_name = trial.suggest_categorical("activation_fn_name", ["tanh", "relu"])
     # lr_schedule = "constant"
     # Uncomment to enable learning rate schedule
     # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
@@ -141,9 +129,7 @@ def objective(
     N_ENVS = 4
 
     env = make_vec_env(
-        make_callable_wrapped_env(
-            env_id, num_balls, max_episode_steps, reward_function, **env_kwargs
-        ),
+        make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, **env_kwargs),
         n_envs=N_ENVS,
     )
 
@@ -232,9 +218,7 @@ def __call__(self, parser, namespace, values, option_string=None):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Description of your program")
     parser.add_argument("--n_trials", type=int, default=20, help="Number of trials")
-    parser.add_argument(
-        "--n_startup_trials", type=int, default=5, help="Number of startup trials"
-    )
+    parser.add_argument("--n_startup_trials", type=int, default=5, help="Number of startup trials")
     parser.add_argument(
         "--reward",
         type=str,
@@ -242,18 +226,10 @@ def __call__(self, parser, namespace, values, option_string=None):
         default="DefaultReward",
         help="Reward function",
     )
-    parser.add_argument(
-        "--n_timesteps", type=int, default=int(5e5), help="Number of timesteps"
-    )
-    parser.add_argument(
-        "--num-balls", type=int, default=2, help="Number of balls in the environment"
-    )
-    parser.add_argument(
-        "--eval_freq", type=int, default=10_000, help="Evaluation frequency"
-    )
-    parser.add_argument(
-        "--n_eval_episodes", type=int, default=100, help="Number of evaluation episodes"
-    )
+    parser.add_argument("--n_timesteps", type=int, default=int(5e5), help="Number of timesteps")
+    parser.add_argument("--num-balls", type=int, default=2, help="Number of balls in the environment")
+    parser.add_argument("--eval_freq", type=int, default=10_000, help="Evaluation frequency")
+    parser.add_argument("--n_eval_episodes", type=int, default=100, help="Number of evaluation episodes")
     parser.add_argument(
         "--env_id",
         type=str,
@@ -268,9 +244,7 @@ def __call__(self, parser, namespace, values, option_string=None):
         default=20,
         help="Max episode steps for the environment",
     )
-    parser.add_argument(
-        "--no-logs", action="store_true", help="Disable Tensorboard logging"
-    )
+    parser.add_argument("--no-logs", action="store_true", help="Disable Tensorboard logging")
 
     parser.add_argument(
         "--env-options",
diff --git a/src/tests/utils/test_features.py b/src/tests/utils/test_features.py
index 5203c73..8a8e2e5 100644
--- a/src/tests/utils/test_features.py
+++ b/src/tests/utils/test_features.py
@@ -4,7 +4,6 @@
 
 
 class TestFeatures(unittest.TestCase):
-
     def test_deg_to_vec(self):
         self.assertTrue(np.allclose(deg_to_vec(0), [1, 0]))
         self.assertTrue(np.allclose(deg_to_vec(90), [0, 1]))
diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py
index fe6dc30..7e215e3 100644
--- a/src/tests/utils/test_reward_functions.py
+++ b/src/tests/utils/test_reward_functions.py
@@ -14,9 +14,7 @@ def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float
 
 
 class TestRewardFunctions(unittest.TestCase):
-    possible_shot_action = np.array(
-        [0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64
-    )
+    possible_shot_action = np.array([0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64)
     impossible_shot_action = np.array([0, 0, 0, 0, 0], dtype=np.float64)
     empty_action = np.array([], dtype=np.float64)
 
@@ -27,12 +25,8 @@ def test_step_pocketed_reward(self):
         reward = StepPocketedReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
 
     def test_game_won_reward(self):
         table_state = create_table_state(3)
@@ -45,26 +39,17 @@ def test_game_won_reward(self):
 
         reward = GameWonReward()
         reward.reset(table_state)
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
 
     def test_constant_reward(self):
-
         table_state = create_table_state(2)
 
         reward = ConstantReward(weight=weight_fn, max_episode_steps=10)
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10)
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10)
 
     def test_cue_ball_pocketed_reward(self):
         table_state = create_table_state(2)
@@ -74,12 +59,8 @@ def test_cue_ball_pocketed_reward(self):
         reward = CueBallPocketedReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
 
     def test_cue_ball_not_moved_reward(self):
         table_state = create_table_state(2)
@@ -89,27 +70,17 @@ def test_cue_ball_not_moved_reward(self):
         reward = CueBallNotMovedReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 1
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_moved, self.empty_action), 0
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1)
+        self.assertEqual(reward.get_reward(table_state, table_state_moved, self.empty_action), 0)
 
     def test_impossible_shot_reward(self):
         table_state = create_table_state(2)
         reward = ImpossibleShotReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.possible_shot_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.impossible_shot_action), 1
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.possible_shot_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state, self.impossible_shot_action), 1)
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1)
 
     def test_delta_best_total_distance_reward(self):
         # Pocket: [0.,  1.118]
@@ -121,9 +92,7 @@ def test_delta_best_total_distance_reward(self):
         table_state = create_table_state(2)
         table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118))
 
-        self.assertEqual(
-            reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0
-        )
+        self.assertEqual(reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0)
         self.assertEqual(
             reward.get_reward(prev_table_state, table_state, self.empty_action),
             0.25,
@@ -173,9 +142,7 @@ def test_combined_reward(self):
         table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118))
 
         self.assertEqual(
-            reward_function.get_reward(
-                prev_table_state, table_state, self.empty_action
-            ),
+            reward_function.get_reward(prev_table_state, table_state, self.empty_action),
             3.3,
         )
 
@@ -184,22 +151,16 @@ def test_binary_reward_no_short_circuit(self):
             ImpossibleShotReward(10, short_circuit=False),
             ConstantReward(5),
         ]
-        reward_function = CombinedReward(
-            reward_functions=rewards_functions, short_circuit=True
-        )
+        reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True)
         table_state = create_table_state(2)
         reward_function.reset(table_state)
 
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.possible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.possible_shot_action),
             5,
         )
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.impossible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.impossible_shot_action),
             15,
         )
 
@@ -208,30 +169,22 @@ def test_binary_reward_short_circuit(self):
             ImpossibleShotReward(10, short_circuit=True),
             ConstantReward(5),
         ]
-        reward_function = CombinedReward(
-            reward_functions=rewards_functions, short_circuit=True
-        )
+        reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True)
         table_state = create_table_state(2)
         reward_function.reset(table_state)
 
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.possible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.possible_shot_action),
             5,
         )
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.impossible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.impossible_shot_action),
             10,
         )
 
     def test_weights(self):
         table_state = create_table_state(3)
-        reward = ConstantReward(
-            weight=NegativeConstantWeightMaxSteps, max_episode_steps=10
-        )
+        reward = ConstantReward(weight=NegativeConstantWeightMaxSteps, max_episode_steps=10)
         reward.reset(table_state)
 
         self.assertEqual(
diff --git a/src/train.py b/src/train.py
index 4254eb6..ee48585 100644
--- a/src/train.py
+++ b/src/train.py
@@ -44,9 +44,7 @@ def train(
     params: Optional[dict] = None,
 ) -> None:
     env = make_vec_env(
-        make_callable_wrapped_env(
-            env_id, num_balls, max_episode_steps, reward_function
-        ),
+        make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function),
         n_envs=n_envs,
     )
 
@@ -55,9 +53,7 @@ def train(
     model_name = get_model_name(env_id, num_balls)
 
     if model_dir is None:
-        model = PPO(
-            "MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams
-        )
+        model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams)
     else:
         model = PPO.load(model_dir, env=env, verbose=1, tensorboard_log=logs_path)
         pretrained_name = model_dir.split("/")[-1].rsplit(".zip", 1)[0]
@@ -145,9 +141,7 @@ def train(
         assert os.path.exists(logs_path), f"params path does not exist: {logs_path}"
         with open(params_path, "r") as fp:
             params = json.load(fp)
-        assert (
-            "params" in params
-        ), "params file must have key 'params' with dict of hyperparameters"
+        assert "params" in params, "params file must have key 'params' with dict of hyperparameters"
         params = params["params"]
 
     print(

From 1a5d2e02a98b889d41b2771ddfe7b957c6df5556 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 18:48:58 +0200
Subject: [PATCH 14/44] Lint with Ruff

---
 setup.py                                                 | 2 +-
 src/eval.py                                              | 9 ++-------
 src/fastfiz_env/envs/frames_fastfiz.py                   | 2 --
 src/fastfiz_env/envs/simple_fastfiz.py                   | 2 +-
 src/fastfiz_env/envs/testing_fastfiz.py                  | 2 --
 src/fastfiz_env/make.py                                  | 1 -
 src/fastfiz_env/reward_functions/binary_reward.py        | 2 +-
 .../reward_functions/common/constant_reward.py           | 2 +-
 src/fastfiz_env/reward_functions/common/weights.py       | 1 -
 src/optimize.py                                          | 2 --
 src/tests/utils/test_reward_functions.py                 | 1 -
 src/train.py                                             | 4 ++--
 12 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/setup.py b/setup.py
index 1d8cae7..8ada4c4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 import re
-from setuptools import setup, find_packages
+from setuptools import setup
 
 # with open("requirements.txt") as f:
 #     requirements = f.read().splitlines()
diff --git a/src/eval.py b/src/eval.py
index 1ace5d3..7f64133 100644
--- a/src/eval.py
+++ b/src/eval.py
@@ -2,22 +2,17 @@
 import os
 from fastfiz_renderer import GameHandler
 import numpy as np
-import fastfiz_env
-from fastfiz_env.envs import FramesFastFiz, SimpleFastFiz, PocketsFastFiz
-from fastfiz_env.reward_functions import reward_function
-from fastfiz_env.reward_functions.default_reward import DefaultReward
+from fastfiz_env.envs import FramesFastFiz, PocketsFastFiz
 from fastfiz_env.utils.fastfiz import (
     create_random_table_state,
-    get_ball_positions,
     normalize_ball_positions,
 )
 from fastfiz_env.envs.utils import game_won, possible_shot
 from stable_baselines3 import PPO
-from typing import Optional, Callable
+from typing import Optional
 import argparse
 
 from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper
-from fastfiz_env.wrappers.utils import spherical_coordinates
 
 
 def get_play_config() -> dict:
diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py
index d18f464..6b5a59d 100644
--- a/src/fastfiz_env/envs/frames_fastfiz.py
+++ b/src/fastfiz_env/envs/frames_fastfiz.py
@@ -5,10 +5,8 @@
 
 from fastfiz_env.envs.utils import game_won, terminal_state
 from ..utils.fastfiz import (
-    shot_params_from_action,
     get_ball_positions,
     create_random_table_state,
-    get_ball_velocity,
     normalize_ball_positions,
     normalize_ball_velocity,
     is_pocketed_state,
diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py
index f1105d2..3de9ddf 100644
--- a/src/fastfiz_env/envs/simple_fastfiz.py
+++ b/src/fastfiz_env/envs/simple_fastfiz.py
@@ -4,7 +4,7 @@
 import gymnasium as gym
 from gymnasium import spaces
 
-from fastfiz_env.utils.fastfiz.fastfiz import num_balls_in_play, num_balls_pocketed
+from fastfiz_env.utils.fastfiz.fastfiz import num_balls_pocketed
 
 from ..utils.fastfiz import (
     create_random_table_state,
diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py
index 2265180..67e35d5 100644
--- a/src/fastfiz_env/envs/testing_fastfiz.py
+++ b/src/fastfiz_env/envs/testing_fastfiz.py
@@ -10,8 +10,6 @@
     create_random_table_state,
     get_ball_positions,
     normalize_ball_positions,
-    shot_params_from_action,
-    action_to_shot,
     shotparams_to_string,
 )
 from ..reward_functions import RewardFunction, DefaultReward
diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py
index 0b46333..0cc5260 100644
--- a/src/fastfiz_env/make.py
+++ b/src/fastfiz_env/make.py
@@ -1,4 +1,3 @@
-from typing import Optional
 from gymnasium.envs.registration import EnvSpec
 import gymnasium as gym
 
diff --git a/src/fastfiz_env/reward_functions/binary_reward.py b/src/fastfiz_env/reward_functions/binary_reward.py
index 86463fe..c6f127c 100644
--- a/src/fastfiz_env/reward_functions/binary_reward.py
+++ b/src/fastfiz_env/reward_functions/binary_reward.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Callable, Optional, Union
+from typing import Optional, Union
 import fastfiz as ff
 from .reward_function import RewardFunction, Weight
 import numpy as np
diff --git a/src/fastfiz_env/reward_functions/common/constant_reward.py b/src/fastfiz_env/reward_functions/common/constant_reward.py
index 0c763f1..11a3ece 100644
--- a/src/fastfiz_env/reward_functions/common/constant_reward.py
+++ b/src/fastfiz_env/reward_functions/common/constant_reward.py
@@ -1,4 +1,4 @@
-from ..reward_function import RewardFunction, Weight
+from ..reward_function import RewardFunction
 import fastfiz as ff
 import numpy as np
 
diff --git a/src/fastfiz_env/reward_functions/common/weights.py b/src/fastfiz_env/reward_functions/common/weights.py
index 6b0622d..0590096 100644
--- a/src/fastfiz_env/reward_functions/common/weights.py
+++ b/src/fastfiz_env/reward_functions/common/weights.py
@@ -1,4 +1,3 @@
-from ..reward_function import Weight
 
 
 ConstantWeight = 1
diff --git a/src/optimize.py b/src/optimize.py
index af61169..3ff5abf 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -6,14 +6,12 @@
 import optuna
 import time
 import torch
-import torch.nn as nn
 from fastfiz_env.make import make_callable_wrapped_env
 from fastfiz_env.reward_functions import DefaultReward, WinningReward, RewardFunction
 from optuna.pruners import MedianPruner
 from optuna.samplers import TPESampler
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import EvalCallback
-from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.env_util import make_vec_env
 from typing import Any, Dict
 from hyperparams import params_to_kwargs
diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py
index 7e215e3..352aafc 100644
--- a/src/tests/utils/test_reward_functions.py
+++ b/src/tests/utils/test_reward_functions.py
@@ -2,7 +2,6 @@
 import fastfiz as ff
 from fastfiz_env.reward_functions.common import *
 from fastfiz_env.reward_functions import CombinedReward
-from fastfiz_env.reward_functions.reward_function import Weight
 from fastfiz_env.utils.fastfiz import create_table_state
 import numpy as np
 
diff --git a/src/train.py b/src/train.py
index ee48585..7157dcd 100644
--- a/src/train.py
+++ b/src/train.py
@@ -85,9 +85,9 @@ def train(
             tb_log_name=model_name,
             progress_bar=True,
         )
-        print(f"Training finished.")
+        print("Training finished.")
     except KeyboardInterrupt:
-        print(f"Training interrupted.")
+        print("Training interrupted.")
     finally:
         model.save(model_path)
         print(f"Model saved: {model_path}")

From 109eee58cfc711286e989037a291e3bca9064c3f Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 18:55:38 +0200
Subject: [PATCH 15/44] Fix ruff checks

---
 src/fastfiz_env/envs/frames_fastfiz.py        |  35 +++---
 src/fastfiz_env/envs/testing_fastfiz.py       |  22 +++-
 .../reward_functions/default_reward.py        |  14 ++-
 .../reward_functions/winning_reward.py        |  13 ++-
 src/fastfiz_env/utils/fastfiz/fastfiz.py      |  71 +++++++++---
 src/tests/utils/test_reward_functions.py      | 108 ++++++++++++++----
 6 files changed, 197 insertions(+), 66 deletions(-)

diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py
index 6b5a59d..4f13845 100644
--- a/src/fastfiz_env/envs/frames_fastfiz.py
+++ b/src/fastfiz_env/envs/frames_fastfiz.py
@@ -26,7 +26,9 @@ class FramesFastFiz(gym.Env):
     TOTAL_BALLS = 16  # Including the cue ball
     num_balls = 2
 
-    def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16) -> None:
+    def __init__(
+        self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16
+    ) -> None:
         super().__init__()
         if num_balls < 2:
             warnings.warn(
@@ -43,11 +45,15 @@ def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: i
 
     def _max_episode_steps(self):
         if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None:
-            self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps")
+            self.max_episode_steps = self.get_wrapper_attr(
+                "_time_limit_max_episode_steps"
+            )
             print(f"Setting max episode steps to {self.max_episode_steps}")
             self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
+    def reset(
+        self, *, seed: Optional[int] = None, options: Optional[dict] = None
+    ) -> tuple[np.ndarray, dict]:
         """
         Reset the environment to its initial state.
         """
@@ -129,18 +135,7 @@ def _observation_space(self):
 
         All values are in the range `[0, TABLE_WIDTH]` and `[0, TABLE_LENGTH]`.
         """
-        table = self.table_state.getTable()
-
         lower = np.full((self.TOTAL_BALLS, 4), [-1, -1, -1, 0])
-        # upper = np.full(
-        #     (self.TOTAL_BALLS, 4),
-        #     [
-        #         table.TABLE_WIDTH,
-        #         table.TABLE_LENGTH,
-        #         self.table_state.MAX_VELOCITY * 1.580,
-        #         1,
-        #     ],
-        # )
         upper = np.full(
             (self.TOTAL_BALLS, 4),
             [1, 1, 1, 1],
@@ -179,9 +174,14 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         """
         Check if the shot is possible.
         """
-        return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
+        return (
+            self.table_state.isPhysicallyPossible(shot_params)
+            == ff.TableState.OK_PRECONDITION
+        )
 
-    def _compute_observation(self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]) -> np.ndarray:
+    def _compute_observation(
+        self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]
+    ) -> np.ndarray:
         return self.compute_observation(prev_table_state, self.table_state, shot)
 
     @classmethod
@@ -223,7 +223,8 @@ def compute_observation(
                 pocketed = is_pocketed_state(gb.state)
                 frames_seq[frame][gb.number] = [
                     *normalize_ball_positions((gb.position.x, gb.position.y)),  # type: ignore
-                    normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - 1,
+                    normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2
+                    - 1,
                     pocketed,
                 ]
         return frames_seq
diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py
index 67e35d5..cb39aac 100644
--- a/src/fastfiz_env/envs/testing_fastfiz.py
+++ b/src/fastfiz_env/envs/testing_fastfiz.py
@@ -66,10 +66,14 @@ def __init__(
 
     def _max_episode_steps(self):
         if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None:
-            self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps")
+            self.max_episode_steps = self.get_wrapper_attr(
+                "_time_limit_max_episode_steps"
+            )
             self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
+    def reset(
+        self, *, seed: Optional[int] = None, options: Optional[dict] = None
+    ) -> tuple[np.ndarray, dict]:
         super().reset(seed=seed)
 
         if self.max_episode_steps is None:
@@ -90,7 +94,9 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -
         observation = self._get_observation()
         info = self._get_info()
 
-        self.logger.info("Reset(%s) - initial observation:\n%s", self.n_episodes, observation)
+        self.logger.info(
+            "Reset(%s) - initial observation:\n%s", self.n_episodes, observation
+        )
         self.logger.info("Reset(%s) - initial info: %s", self.n_episodes, info)
 
         self.n_episodes += 1
@@ -148,7 +154,9 @@ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]
     def _get_observation(self):
         ball_positions = get_ball_positions(self.table_state)[: self.TOTAL_BALLS]
         # ball_positions = normalize_ball_positions(ball_positions)  # Normalize to [0, 1]
-        ball_positions = normalize_ball_positions(ball_positions) * 2 - 1  # Normalize to [-1, 1] (symmetric)
+        ball_positions = (
+            normalize_ball_positions(ball_positions) * 2 - 1
+        )  # Normalize to [-1, 1] (symmetric)
         observation = np.zeros((self.TOTAL_BALLS, 2), dtype=np.float32)
         for i, ball_pos in enumerate(ball_positions):
             observation[i] = [*ball_pos]
@@ -185,7 +193,6 @@ def _observation_space(self) -> spaces.Box:
 
         All values are in the range `[0, TABLE_WIDTH]` and `[0, TABLE_LENGTH]`.
         """
-        table = self.table_state.getTable()
         lower = np.full((self.TOTAL_BALLS, 2), [-1, -1])
         upper = np.full((self.TOTAL_BALLS, 2), [1, 1])
         return spaces.Box(low=lower, high=upper, dtype=np.float32)
@@ -214,4 +221,7 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         """
         Check if the shot is possible.
         """
-        return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
+        return (
+            self.table_state.isPhysicallyPossible(shot_params)
+            == ff.TableState.OK_PRECONDITION
+        )
diff --git a/src/fastfiz_env/reward_functions/default_reward.py b/src/fastfiz_env/reward_functions/default_reward.py
index 5c1d647..fc0ce87 100644
--- a/src/fastfiz_env/reward_functions/default_reward.py
+++ b/src/fastfiz_env/reward_functions/default_reward.py
@@ -1,5 +1,17 @@
 from .combined_reward import CombinedReward
-from .common import *
+from .common import (
+    ConstantReward,
+    StepPocketedReward,
+    GameWonReward,
+    CueBallPocketedReward,
+    ConstantWeightBalls,
+    NegativeConstantWeightMaxSteps,
+    ConstantWeight,
+    NegativeConstantWeight,
+    ExponentialVelocityReward,
+    BallsNotMovedReward,
+)
+
 
 rewards = [
     GameWonReward(ConstantWeight),
diff --git a/src/fastfiz_env/reward_functions/winning_reward.py b/src/fastfiz_env/reward_functions/winning_reward.py
index 235e93d..34994c1 100644
--- a/src/fastfiz_env/reward_functions/winning_reward.py
+++ b/src/fastfiz_env/reward_functions/winning_reward.py
@@ -1,5 +1,16 @@
 from .combined_reward import CombinedReward
-from .common import *
+from .common import (
+    ConstantReward,
+    StepPocketedReward,
+    GameWonReward,
+    CueBallPocketedReward,
+    ConstantWeightBalls,
+    NegativeConstantWeightMaxSteps,
+    ConstantWeight,
+    NegativeConstantWeight,
+    ExponentialVelocityReward,
+    StepNoBallsPocketedReward,
+)
 
 rewards = [
     GameWonReward(ConstantWeight),
diff --git a/src/fastfiz_env/utils/fastfiz/fastfiz.py b/src/fastfiz_env/utils/fastfiz/fastfiz.py
index d4c936c..78727f6 100644
--- a/src/fastfiz_env/utils/fastfiz/fastfiz.py
+++ b/src/fastfiz_env/utils/fastfiz/fastfiz.py
@@ -44,7 +44,13 @@ def num_balls_in_play(table_state: ff.TableState) -> int:
     Returns:
         int: The number of balls in play.
     """
-    return len([i for i in range(table_state.getNumBalls()) if table_state.getBall(i).isInPlay()])
+    return len(
+        [
+            i
+            for i in range(table_state.getNumBalls())
+            if table_state.getBall(i).isInPlay()
+        ]
+    )
 
 
 def num_balls_pocketed(
@@ -65,10 +71,14 @@ def num_balls_pocketed(
         int: The number of balls pocketed.
     """
     stop = table_state.getNumBalls() if range_stop is None else range_stop
-    return len([i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()])
+    return len(
+        [i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()]
+    )
 
 
-def any_ball_has_moved(prev_ball_positions: np.ndarray, ball_positions: np.ndarray) -> bool:
+def any_ball_has_moved(
+    prev_ball_positions: np.ndarray, ball_positions: np.ndarray
+) -> bool:
     """
     Check if any ball has moved by comparing the previous ball positions with the current ball positions.
 
@@ -166,7 +176,9 @@ def distance_to_closest_pocket(ball_position: np.ndarray, pockets: np.ndarray) -
     return np.min(distance_to_pockets(ball_position, pockets))
 
 
-def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) -> np.ndarray:
+def distances_to_closest_pocket(
+    ball_positions: np.ndarray, pockets: np.ndarray
+) -> np.ndarray:
     """
     Calculates the distances from each ball position to the closest pocket.
 
@@ -177,7 +189,12 @@ def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray)
     Returns:
         np.ndarray: An array of distances from each ball position to the closest pocket.
     """
-    return np.array([distance_to_closest_pocket(ball_position, pockets) for ball_position in ball_positions])
+    return np.array(
+        [
+            distance_to_closest_pocket(ball_position, pockets)
+            for ball_position in ball_positions
+        ]
+    )
 
 
 def create_table_state(n_balls: int) -> ff.TableState:
@@ -206,7 +223,9 @@ def create_table_state(n_balls: int) -> ff.TableState:
     return table_state
 
 
-def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.TableState:
+def create_random_table_state(
+    n_balls: int, seed: Optional[int] = None
+) -> ff.TableState:
     """
     Creates a random table state with the specified number of balls.
 
@@ -222,7 +241,9 @@ def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.Ta
     return table_state
 
 
-def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None) -> None:
+def randomize_table_state(
+    table_state: ff.TableState, seed: Optional[int] = None
+) -> ff.TableState:
     """
     Randomizes the positions of the balls on the pool table within the given table state.
 
@@ -276,7 +297,9 @@ def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None
     return table_state
 
 
-def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray:
+def map_action_to_shot_params(
+    table_state: ff.TableState, action: np.ndarray
+) -> np.ndarray:
     """
     Maps the given action values to the corresponding shot parameters within the specified ranges.
 
@@ -288,15 +311,19 @@ def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) ->
         np.ndarray: The mapped shot parameters.
 
     """
-    a = np.interp(action[0], [0, 0], [0, 0])
-    b = np.interp(action[1], [0, 0], [0, 0])
-    theta = np.interp(action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001])
+    # a = np.interp(action[0], [0, 0], [0, 0])
+    # b = np.interp(action[1], [0, 0], [0, 0])
+    theta = np.interp(
+        action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001]
+    )
     phi = np.interp(action[3], [-1, 1], [0, 360])
     v = np.interp(action[4], [-1, 1], [0, table_state.MAX_VELOCITY - 0.001])
     return np.array([0, 0, theta, phi, v], dtype=np.float64)
 
 
-def shot_params_from_action(table_state: ff.TableState, action: np.ndarray) -> ff.ShotParams:
+def shot_params_from_action(
+    table_state: ff.TableState, action: np.ndarray
+) -> ff.ShotParams:
     """
     Converts an action into shot parameters.
 
@@ -322,11 +349,21 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam
     MAX_THETA = ff.TableState.MAX_THETA
     MAX_VELOCITY = ff.TableState.MAX_VELOCITY
 
-    a = np.interp(action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET])
-    b = np.interp(action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET])
-    theta = np.interp(action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA])
-    phi = np.interp(action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI])
-    velocity = np.interp(action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY])
+    a = np.interp(
+        action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET]
+    )
+    b = np.interp(
+        action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET]
+    )
+    theta = np.interp(
+        action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA]
+    )
+    phi = np.interp(
+        action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI]
+    )
+    velocity = np.interp(
+        action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY]
+    )
 
     # print(f"a: {a}, b: {b}, theta: {theta}, phi: {phi}, velocity: {velocity}")
 
diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py
index 352aafc..5caf0ff 100644
--- a/src/tests/utils/test_reward_functions.py
+++ b/src/tests/utils/test_reward_functions.py
@@ -1,6 +1,20 @@
 import unittest
 import fastfiz as ff
-from fastfiz_env.reward_functions.common import *
+from fastfiz_env.reward_functions.common import (
+    ConstantReward,
+    StepPocketedReward,
+    GameWonReward,
+    CueBallPocketedReward,
+    CueBallNotMovedReward,
+    ImpossibleShotReward,
+    DeltaBestTotalDistanceReward,
+    TotalDistanceReward,
+    ConstantWeightBalls,
+    ConstantWeightMaxSteps,
+    NegativeConstantWeightMaxSteps,
+    ConstantWeightNumBalls,
+    ConstantWeightCurrentStep,
+)
 from fastfiz_env.reward_functions import CombinedReward
 from fastfiz_env.utils.fastfiz import create_table_state
 import numpy as np
@@ -13,7 +27,9 @@ def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float
 
 
 class TestRewardFunctions(unittest.TestCase):
-    possible_shot_action = np.array([0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64)
+    possible_shot_action = np.array(
+        [0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64
+    )
     impossible_shot_action = np.array([0, 0, 0, 0, 0], dtype=np.float64)
     empty_action = np.array([], dtype=np.float64)
 
@@ -24,8 +40,12 @@ def test_step_pocketed_reward(self):
         reward = StepPocketedReward()
         reward.reset(table_state)
 
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
-        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 0
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
+        )
 
     def test_game_won_reward(self):
         table_state = create_table_state(3)
@@ -38,8 +58,12 @@ def test_game_won_reward(self):
 
         reward = GameWonReward()
         reward.reset(table_state)
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
-        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 0
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
+        )
 
     def test_constant_reward(self):
         table_state = create_table_state(2)
@@ -47,8 +71,12 @@ def test_constant_reward(self):
         reward = ConstantReward(weight=weight_fn, max_episode_steps=10)
         reward.reset(table_state)
 
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10)
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10)
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10
+        )
 
     def test_cue_ball_pocketed_reward(self):
         table_state = create_table_state(2)
@@ -58,8 +86,12 @@ def test_cue_ball_pocketed_reward(self):
         reward = CueBallPocketedReward()
         reward.reset(table_state)
 
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
-        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 0
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
+        )
 
     def test_cue_ball_not_moved_reward(self):
         table_state = create_table_state(2)
@@ -69,17 +101,27 @@ def test_cue_ball_not_moved_reward(self):
         reward = CueBallNotMovedReward()
         reward.reset(table_state)
 
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1)
-        self.assertEqual(reward.get_reward(table_state, table_state_moved, self.empty_action), 0)
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 1
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state_moved, self.empty_action), 0
+        )
 
     def test_impossible_shot_reward(self):
         table_state = create_table_state(2)
         reward = ImpossibleShotReward()
         reward.reset(table_state)
 
-        self.assertEqual(reward.get_reward(table_state, table_state, self.possible_shot_action), 0)
-        self.assertEqual(reward.get_reward(table_state, table_state, self.impossible_shot_action), 1)
-        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1)
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.possible_shot_action), 0
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.impossible_shot_action), 1
+        )
+        self.assertEqual(
+            reward.get_reward(table_state, table_state, self.empty_action), 1
+        )
 
     def test_delta_best_total_distance_reward(self):
         # Pocket: [0.,  1.118]
@@ -91,7 +133,9 @@ def test_delta_best_total_distance_reward(self):
         table_state = create_table_state(2)
         table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118))
 
-        self.assertEqual(reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0)
+        self.assertEqual(
+            reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0
+        )
         self.assertEqual(
             reward.get_reward(prev_table_state, table_state, self.empty_action),
             0.25,
@@ -141,7 +185,9 @@ def test_combined_reward(self):
         table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118))
 
         self.assertEqual(
-            reward_function.get_reward(prev_table_state, table_state, self.empty_action),
+            reward_function.get_reward(
+                prev_table_state, table_state, self.empty_action
+            ),
             3.3,
         )
 
@@ -150,16 +196,22 @@ def test_binary_reward_no_short_circuit(self):
             ImpossibleShotReward(10, short_circuit=False),
             ConstantReward(5),
         ]
-        reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True)
+        reward_function = CombinedReward(
+            reward_functions=rewards_functions, short_circuit=True
+        )
         table_state = create_table_state(2)
         reward_function.reset(table_state)
 
         self.assertEqual(
-            reward_function.get_reward(table_state, table_state, self.possible_shot_action),
+            reward_function.get_reward(
+                table_state, table_state, self.possible_shot_action
+            ),
             5,
         )
         self.assertEqual(
-            reward_function.get_reward(table_state, table_state, self.impossible_shot_action),
+            reward_function.get_reward(
+                table_state, table_state, self.impossible_shot_action
+            ),
             15,
         )
 
@@ -168,22 +220,30 @@ def test_binary_reward_short_circuit(self):
             ImpossibleShotReward(10, short_circuit=True),
             ConstantReward(5),
         ]
-        reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True)
+        reward_function = CombinedReward(
+            reward_functions=rewards_functions, short_circuit=True
+        )
         table_state = create_table_state(2)
         reward_function.reset(table_state)
 
         self.assertEqual(
-            reward_function.get_reward(table_state, table_state, self.possible_shot_action),
+            reward_function.get_reward(
+                table_state, table_state, self.possible_shot_action
+            ),
             5,
         )
         self.assertEqual(
-            reward_function.get_reward(table_state, table_state, self.impossible_shot_action),
+            reward_function.get_reward(
+                table_state, table_state, self.impossible_shot_action
+            ),
             10,
         )
 
     def test_weights(self):
         table_state = create_table_state(3)
-        reward = ConstantReward(weight=NegativeConstantWeightMaxSteps, max_episode_steps=10)
+        reward = ConstantReward(
+            weight=NegativeConstantWeightMaxSteps, max_episode_steps=10
+        )
         reward.reset(table_state)
 
         self.assertEqual(

From 2d3f7fa15d56ea072ae2a4adf00d4e75a7127616 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:43:48 +0200
Subject: [PATCH 16/44] Fix coordinate calculations

---
 src/fastfiz_env/wrappers/action.py | 191 ++++++++++++-----------------
 src/fastfiz_env/wrappers/utils.py  |  53 ++++----
 2 files changed, 111 insertions(+), 133 deletions(-)

diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py
index bb59399..cd869f0 100644
--- a/src/fastfiz_env/wrappers/action.py
+++ b/src/fastfiz_env/wrappers/action.py
@@ -1,8 +1,4 @@
-from .utils import (
-    vec_to_abs_deg,
-    vec_length,
-    spherical_coordinates,
-)
+from .utils import vec_to_abs_deg, cart2sph, sph2deg
 from gymnasium import ActionWrapper
 from gymnasium import spaces
 import numpy as np
@@ -10,60 +6,59 @@
 
 
 class ActionSpaces(Enum):
-    NO_OFFSET_5D = (0,)
-    """No a and b offset, 5D representation of cue stick:
-    - a: 0
-    - b: 0
-    - theta: The angle of the shot in the yz-plane (0th and 1st element).
-    - phi: The angle of the in the xz-plane (2nd and 3rd element).
-    - velocity: 5th element.
+    VECTOR_2D = (0,)
     """
-    NO_OFFSET_4D = (1,)
-    """No a and b offset, 4D representation of cue stick:
-    - a: 0
-    - b: 0
-    - theta: The angle of the shot in the yz-plane (0th and 1st element).
-    - phi: The angle of the in the xz-plane (2nd and 3rd element).
-    - velocity: Derived from the unit vector of theta + phi.
+    2D vector representation of cue stick:
+    - a: Always 0.
+    - b: Always 11.
+    - theta: Always 20.
+    - phi: Angle between the 2D vector and the x-axis.
+    - velocity: Magnitude of the 2D vector.
     """
-
-    NO_OFFSET_3D = (2,)
+    VECTOR_3D = (1,)
     """
-    No a and b offset, 3D representation of cue stick:
-    - a: 0
-    - b: 0
-    - theta: The angle of the shot in the yz-plane (0th and 1st element).
-    - phi: The angle of the in the xz-plane (1st and 2nd element).
-    - velocity: Derived from the 3D vector.
+    3D vector representation of cue stick:
+    - a: Always 0.
+    - b: Always 0.
+    - theta: Derived from the 3D vector spherical coordinates.
+    - phi: Derived from the 3D vector spherical coordinates.
+    - velocity: Magnitude of the 3D vector.
     """
-
-    NORM_PARAMS_5D = (3,)
+    NORM_3D = (2,)
     """
-    Normalized shot paramaters, 5D representation of cue stick:
-    - a: The offset of the cue ball in the x-coordinate. (Always 0)
-    - b: The offset of the cue ball in the y-coordinate. (Always 0)
-    - theta: The angle of the shot in the yz-plane.
-    - phi: The angle of the in the xz-plane.
-    - velocity: The velocity of the shot.
+    Normalized shot paramaters, 3D representation of cue stick:
+    - a: Always 0.
+    - b: Always 0.
+    - theta: Normalized angle from `MIN_THETA` to `MAX_THETA`.
+    - phi: Normalized angle from `MIN_PHI` to `MAX_PHI`.
+    - velocity: Normalized velocity from `MIN_VELOCITY` to `MAX_VELOCITY`.
     """
-
-    NO_OFFSET_NORM_PARAMS_3D = (4,)
+    NORM_5D = (3,)
     """
     Normalized shot paramaters, 5D representation of cue stick:
-    - a: 0
-    - b: 0
-    - theta: The angle of the shot in the yz-plane.
-    - phi: The angle of the in the xz-plane.
-    - velocity: The velocity of the shot.
+    - a: Alwyas 0.
+    - b: Always 0.
+    - theta: Normalized angle from `MIN_THETA` to `MAX_THETA`.
+    - phi: Normalized angle from `MIN_PHI` to `MAX_PHI`.
+    - velocity: Normalized velocity from `MIN_VELOCITY` to `MAX_VELOCITY`.
     """
-    VECTOR_2D = (5,)
+    OFFSET_NORM_5D = (4,)
     """
-    2D vector
+    Normalized shot paramaters, 5D representation of cue stick:
+    - a: Normalized value from `MIN_OFFSET` to `MAX_OFFSET`.
+    - b: Normalized value from `MIN_OFFSET` to `MAX_OFFSET`.
+    - theta: Normalized angle from `MIN_THETA` to `MAX_THETA`.
+    - phi: Normalized angle from `MIN_PHI` to `MAX_PHI`.
+    - velocity: Normalized velocity from `MIN_VELOCITY` to `MAX_VELOCITY`.
     """
-
-    OUTPUT = (6,)
+    OUTPUT_5D = (5,)
     """
-    Output of FastFizActionWrapper.
+    5D representation of cue stick, using original FastFiz shot parameter values:
+    - a: Offset of the cue ball in the x-coordinate.
+    - b: Offset of the cue ball in the y-coordinate.
+    - theta: The vertical angle of the shot.
+    - phi: The horizontal angle of the shot.
+    - velocity: The power of the shot (in m/s).
     """
 
 
@@ -71,39 +66,36 @@ class FastFizActionWrapper(ActionWrapper):
     MIN_THETA = 0
     MAX_THETA = 70 - 0.001
     MIN_PHI = 0
-    MAX_PHI = 360
+    MAX_PHI = 360 - 0.001
     MIN_VELOCITY = 0
-    MAX_VELOCITY = 10
+    MAX_VELOCITY = 10 - 0.001
+    MIN_OFFSET = -15
+    MAX_OFFSET = 15
     SPACES = {
-        "NO_OFFSET_3D": spaces.Box(
-            low=np.array([-1, -1, -1]),
-            high=np.array([1, 1, 1]),
+        "VECTOR_2D": spaces.Box(
+            low=np.array([-1, -1]),
+            high=np.array([1, 1]),
+            shape=(2,),
             dtype=np.float32,
         ),
-        "NO_OFFSET_4D": spaces.Box(
-            low=np.array([-1, -1, -1, -1]),
-            high=np.array([1, 1, 1, 1]),
+        "VECTOR_3D": spaces.Box(
+            low=np.array([-1, -1, -1]),
+            high=np.array([1, 1, 1]),
             dtype=np.float32,
         ),
-        "NO_OFFSET_5D": spaces.Box(
-            low=np.array([-1, -1, -1, -1, -1]),
-            high=np.array([1, 1, 1, 1, 1]),
+        "NORM_3D": spaces.Box(
+            low=np.array([-1, -1, -1]),
+            high=np.array([1, 1, 1]),
             dtype=np.float32,
         ),
-        "NORM_PARAMS_5D": spaces.Box(
+        "NORM_5D": spaces.Box(
             low=np.array([0, 0, -1, -1, -1]),
             high=np.array([0, 0, 1, 1, 1]),
             dtype=np.float32,
         ),
-        "NO_OFFSET_NORM_PARAMS_3D": spaces.Box(
-            low=np.array([-1, -1, -1]),
-            high=np.array([1, 1, 1]),
-            dtype=np.float32,
-        ),
-        "VECTOR_2D": spaces.Box(
-            low=np.array([-1, -1]),
-            high=np.array([1, 1]),
-            shape=(2,),
+        "OFFSET_NORM_5D": spaces.Box(
+            low=np.array([-1, -1, -1, -1, -1]),
+            high=np.array([1, 1, 1, 1, 1]),
             dtype=np.float32,
         ),
         "OUTPUT": spaces.Box(
@@ -124,65 +116,42 @@ def __init__(
         self.action_space = self.SPACES[action_space_id.name]
 
     def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[float, np.dtype[np.float32]]:
-        # Offset a and b are always 0
         offset_a = 0
         offset_b = 0
 
         match self.action_space_id:
-            case ActionSpaces.NO_OFFSET_4D:
-                if np.allclose(action, 0):
-                    return np.array([offset_a, offset_b, 0, 0, 0])
-                vec_theta = action[:2]
-                theta = vec_to_abs_deg(vec_theta)
-                theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA))
-
-                vec_phi = action[2:4]
-                phi = vec_to_abs_deg(vec_phi)
-
-                vec_velocity = vec_length(vec_theta + vec_phi)
-                velocity = np.interp(vec_velocity, (0, 2), (self.MIN_VELOCITY, self.MAX_VELOCITY))
-
             case ActionSpaces.VECTOR_2D:
-                if np.allclose(action, 0):
-                    return np.array([offset_a, offset_b, 0, 0, 0])
                 theta = 20
-                phi = np.degrees(np.arctan2(action[1], action[0])) % 360
-                # phi = np.interp(theta, (0, 360), (self.MIN_PHI, self.MAX_PHI))
+                phi = float(np.degrees(np.arctan2(action[1], action[0])) % self.MAX_PHI)
                 offset_b = 11
-                velocity = np.hypot(*action)
                 velocity = np.interp(
-                    velocity,
+                    np.hypot(*action),
                     (0, np.sqrt(2)),
                     (self.MIN_VELOCITY, self.MAX_VELOCITY - 5),
                 )
-
-            case ActionSpaces.NO_OFFSET_3D:
-                if np.allclose(action, 0):
-                    return np.array([offset_a, offset_b, 0, 0, 0])
-                r, theta, phi = spherical_coordinates(action)
-                theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA))
-                phi = np.interp(phi, (0, 360), (self.MIN_PHI, self.MAX_PHI))
+            case ActionSpaces.VECTOR_3D:
+                x, y, z = action
+                az, el, r = cart2sph(x, y, z)
+                phi, theta, r = sph2deg(az, el, r)
+                theta = float(np.interp(theta, (0, np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA)))
+                phi = float(np.interp(phi, (0, np.rad2deg(2 * np.pi)), (self.MIN_PHI, self.MAX_PHI)))
                 velocity = np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY))
-
-            case ActionSpaces.NO_OFFSET_5D:
-                if np.allclose(action, 0):
-                    return np.array([offset_a, offset_b, 0, 0, 0])
-                vec_theta = action[:2]
-                theta = vec_to_abs_deg(vec_theta)
-                theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA))
-
-                vec_phi = action[2:4]
-                phi = vec_to_abs_deg(vec_phi)
-
+            case ActionSpaces.NORM_3D:
+                theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
+                phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
+                velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
+            case ActionSpaces.NORM_5D:
+                theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
+                phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
                 velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
-            case ActionSpaces.NORM_PARAMS_5D:
+            case ActionSpaces.OFFSET_NORM_5D:
+                offset_a = np.interp(action[0], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET))
+                offset_b = np.interp(action[1], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET))
                 theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
                 phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
                 velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
-            case ActionSpaces.NO_OFFSET_NORM_PARAMS_3D:
-                theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
-                phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
+
+                velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
 
         action = np.array([offset_a, offset_b, theta, phi, velocity])
         return action
diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py
index 5f68dca..9a89002 100644
--- a/src/fastfiz_env/wrappers/utils.py
+++ b/src/fastfiz_env/wrappers/utils.py
@@ -13,7 +13,7 @@ def vec_to_deg(vec: np.ndarray) -> float:
     """
     Gets the angle of a vector.
     """
-    return np.rad2deg(np.arctan2(vec[1], vec[0]))
+    return float(np.rad2deg(np.arctan2(vec[1], vec[0])))
 
 
 def vec_to_abs_deg(vec: np.ndarray) -> float:
@@ -23,42 +23,51 @@ def vec_to_abs_deg(vec: np.ndarray) -> float:
     return vec_to_deg(vec) % 360
 
 
-def vec_length(vec: np.ndarray) -> float:
+def vec_mag(vec: np.ndarray) -> float:
     """
     Gets the length of a vector.
     """
-    return np.linalg.norm(vec)
+    return float(np.linalg.norm(vec))
 
 
-def vec_normalize(vec: np.ndarray) -> np.ndarray:
+def vec_norm(vec: np.ndarray) -> np.ndarray:
     """
     Gets the unit vector of a vector.
     """
-    return vec / vec_length(vec)
+    return vec / vec_mag(vec)
 
 
-def spherical_coordinates(vector: np.ndarray) -> tuple[float, float, float]:
+def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:
     """
-    Converts a vector to spherical coordinates.
+    Convert Cartesian coordinates to spherical coordinates.
+
+    Args:
+        x (float): x-coordinate.
+        y (float): y-coordinate.
+        z (float): z-coordinate.
 
     Returns:
-        r: float - The magnitude of the vector.
-        theta: float - The angle from the z-axis.
-        phi: float - The angle in the xy-plane.
+        tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius.
     """
+    hxy: float = np.hypot(x, y)
+    r: float = np.hypot(hxy, z)
+    el: float = np.arctan2(z, hxy)
+    az: float = np.arctan2(y, x)
+    return az, el, r
 
-    assert len(vector) == 3, "Vector must have excatly 3 components."
-    Vx, Vy, Vz = vector
-
-    theta = np.arccos(Vz / np.linalg.norm(vector))
-
-    phi = np.arctan2(Vy, Vx)  # Using arctan2 to get correct quadrant
-    phi = (phi + 2 * np.pi) % (2 * np.pi)
-
-    r = np.linalg.norm(vector)
 
-    return r, np.degrees(theta), np.degrees(phi)
+def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]:
+    """
+    Convert spherical coordinates to degrees.
 
+    Args:
+        az (float): Azimuth angle in radians.
+        el (float): Elevation angle in radians.
+        r (float): Radius.
 
-def vec_magnitude(vector):
-    return np.linalg.norm(vector)
+    Returns:
+        tuple[float, float, float]: A tuple containing azimuth angle (phi, in degrees), elevation angle (theta, in degrees), and radius.
+    """
+    phi: float = np.rad2deg(az % (2 * np.pi))
+    theta: float = np.rad2deg(el % np.pi)
+    return phi, theta, r

From 5744e7438a979f06e2077c5a67112c90c2afe7e9 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:45:25 +0200
Subject: [PATCH 17/44] Remove TestingFastFiz env

---
 src/fastfiz_env/__init__.py             |   7 -
 src/fastfiz_env/envs/__init__.py        |   2 -
 src/fastfiz_env/envs/testing_fastfiz.py | 227 ------------------------
 3 files changed, 236 deletions(-)
 delete mode 100644 src/fastfiz_env/envs/testing_fastfiz.py

diff --git a/src/fastfiz_env/__init__.py b/src/fastfiz_env/__init__.py
index 2be325b..5f949cf 100644
--- a/src/fastfiz_env/__init__.py
+++ b/src/fastfiz_env/__init__.py
@@ -3,7 +3,6 @@
 
 Avaliable environments:
     - `SimpleFastFiz-v0`: Observes the position of the balls.
-    - `TestingFastFiz-v0`: Observes the position of the balls. Used for testing purposes with options e.g. seed, logging, action_space_id.
     - `FramesFastFiz-v0`: Observes the position of the balls and the frames of the simulation.
     - `PocketsFastFiz-v0`: Observes the position of the balls and in play state. Pocketed balls position always corresponds to given pocket center.
 
@@ -57,12 +56,6 @@
 )
 
 
-register(
-    id="TestingFastFiz-v0",
-    entry_point="fastfiz_env.envs:TestingFastFiz",
-    additional_wrappers=(wrappers.TimeLimitInjectionWrapper.wrapper_spec(),),
-)
-
 register(
     id="FramesFastFiz-v0",
     entry_point="fastfiz_env.envs:FramesFastFiz",
diff --git a/src/fastfiz_env/envs/__init__.py b/src/fastfiz_env/envs/__init__.py
index b183270..f756e1e 100644
--- a/src/fastfiz_env/envs/__init__.py
+++ b/src/fastfiz_env/envs/__init__.py
@@ -4,14 +4,12 @@
 
 from . import utils
 from .simple_fastfiz import SimpleFastFiz
-from .testing_fastfiz import TestingFastFiz
 from .frames_fastfiz import FramesFastFiz
 from .pockets_fastfiz import PocketsFastFiz
 
 __all__ = [
     "utils",
     "SimpleFastFiz",
-    "TestingFastFiz",
     "FramesFastFiz",
     "PocketsFastFiz",
 ]
diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py
deleted file mode 100644
index cb39aac..0000000
--- a/src/fastfiz_env/envs/testing_fastfiz.py
+++ /dev/null
@@ -1,227 +0,0 @@
-import os
-import numpy as np
-import gymnasium as gym
-from gymnasium import spaces
-from typing import Optional
-from ..wrappers import ActionSpaces, FastFizActionWrapper
-
-from fastfiz_env.utils.fastfiz.fastfiz import table_state_to_string
-from ..utils.fastfiz import (
-    create_random_table_state,
-    get_ball_positions,
-    normalize_ball_positions,
-    shotparams_to_string,
-)
-from ..reward_functions import RewardFunction, DefaultReward
-import fastfiz as ff
-import logging
-import time
-
-
-class TestingFastFiz(gym.Env):
-    """FastFiz environment for testing."""
-
-    TOTAL_BALLS = 16
-
-    def __init__(
-        self,
-        reward_function: RewardFunction = DefaultReward,
-        num_balls: int = 16,
-        *,
-        options: Optional[dict] = None,
-    ) -> None:
-        super().__init__()
-        self.options = options
-        self.num_balls = num_balls
-        self.table_state = create_random_table_state(self.num_balls)
-        self.observation_space = self._observation_space()
-        action_space_id = self.options.get("action_space_id", ActionSpaces.NO_OFFSET_3D)
-        self.action_space = FastFizActionWrapper.get_action_space(action_space_id)
-        self.max_episode_steps = None
-        self.reward = reward_function
-
-        # Logging
-        self.logger = logging.getLogger(__name__)
-        logs_dir = self.options.get("logs_dir", "")
-        os.makedirs(logs_dir, exist_ok=True)
-        logging.basicConfig(
-            filename=os.path.join(logs_dir, f"{time.strftime('%m-%d_%H:%M:%S')}.log"),
-            filemode="a",
-            format="%(asctime)s - %(levelname)s - %(message)s",
-            datefmt="%H:%M:%S",
-            level=self.options.get("log_level", logging.INFO),
-        )
-
-        self.n_episodes = 0
-        self.n_step = 0
-
-        self.logger.info(
-            "TestFastFiz initialized with:\n- balls: %s\n- rewards: %s\n- options: %s\n- action space: %s\n- observation space: %s",
-            self.num_balls,
-            self.reward,
-            self.options,
-            self.action_space,
-            self.observation_space,
-        )
-
-    def _max_episode_steps(self):
-        if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None:
-            self.max_episode_steps = self.get_wrapper_attr(
-                "_time_limit_max_episode_steps"
-            )
-            self.reward.max_episode_steps = self.max_episode_steps
-
-    def reset(
-        self, *, seed: Optional[int] = None, options: Optional[dict] = None
-    ) -> tuple[np.ndarray, dict]:
-        super().reset(seed=seed)
-
-        if self.max_episode_steps is None:
-            self._max_episode_steps()
-
-        seed = self.options.get("seed", None)
-        self.logger.info("Reset(%s) - total n_steps: %s", self.n_episodes, self.n_step)
-        self.logger.info("Reset(%s) - table state seed: %s", self.n_episodes, seed)
-        self.table_state = create_random_table_state(self.num_balls, seed=seed)
-        self.reward.reset(self.table_state)
-
-        self.logger.info(
-            "Reset(%s) - table state:\n%s",
-            self.n_episodes,
-            table_state_to_string(self.table_state),
-        )
-
-        observation = self._get_observation()
-        info = self._get_info()
-
-        self.logger.info(
-            "Reset(%s) - initial observation:\n%s", self.n_episodes, observation
-        )
-        self.logger.info("Reset(%s) - initial info: %s", self.n_episodes, info)
-
-        self.n_episodes += 1
-        self.n_step = 0
-
-        return observation, info
-
-    def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
-        """
-        Execute an action in the environment.
-        """
-
-        prev_table_state = ff.TableState(self.table_state)
-        # action_space = spaces.Box(
-        #     low=np.array([0, 0, -1, -1, -1]),
-        #     high=np.array([0, 0, 1, 1, 1]),
-        #     dtype=np.float32,
-        # )
-        # shot_params = action_to_shot([0, 0, *action], action_space)
-
-        shot_params = ff.ShotParams(*action)
-
-        self.logger.info(
-            "Step(%s) - Action:\n- action: %s\n- shot params: %s",
-            self.n_step,
-            action,
-            shotparams_to_string(shot_params),
-        )
-
-        impossible_shot = not self._possible_shot(shot_params)
-
-        self.logger.info("Step(%s) - impossible shot: %s", self.n_step, impossible_shot)
-
-        if not impossible_shot:
-            self.table_state.executeShot(shot_params)
-
-        observation = self._get_observation()
-
-        reward = self.reward.get_reward(prev_table_state, self.table_state, action)
-
-        terminated = self._is_terminal_state()
-        truncated = False
-        info = self._get_info()
-
-        self.logger.debug("Step(%s) - observation:\n%s", self.n_step, observation)
-        self.logger.info("Step(%s) - reward: %s", self.n_step, reward)
-        self.logger.info("Step(%s) - terminated: %s", self.n_step, terminated)
-        self.logger.debug("Step(%s) - truncated: %s", self.n_step, truncated)
-        self.logger.info("Step(%s) - info: %s", self.n_step, info)
-
-        self.n_step += 1
-
-        return observation, reward, terminated, truncated, info
-
-    def _get_observation(self):
-        ball_positions = get_ball_positions(self.table_state)[: self.TOTAL_BALLS]
-        # ball_positions = normalize_ball_positions(ball_positions)  # Normalize to [0, 1]
-        ball_positions = (
-            normalize_ball_positions(ball_positions) * 2 - 1
-        )  # Normalize to [-1, 1] (symmetric)
-        observation = np.zeros((self.TOTAL_BALLS, 2), dtype=np.float32)
-        for i, ball_pos in enumerate(ball_positions):
-            observation[i] = [*ball_pos]
-
-        return np.array(observation)
-
-    def _get_info(self):
-        return {
-            "is_success": self._game_won(),
-        }
-
-    def _is_terminal_state(self) -> bool:
-        if self.table_state.getBall(0).isPocketed():
-            return True
-
-        return self._game_won()
-
-    def _game_won(self) -> bool:
-        if self.table_state.getBall(0).isPocketed():
-            return False
-
-        for i in range(1, self.num_balls):
-            if not self.table_state.getBall(i).isPocketed():
-                return False
-        return True
-
-    def _observation_space(self) -> spaces.Box:
-        """
-        Get the observation space of the environment.
-
-        The observation space is a 16-dimensional box with the position of each ball:
-        - x: The x-coordinate of the ball.
-        - y: The y-coordinate of the ball.
-
-        All values are in the range `[0, TABLE_WIDTH]` and `[0, TABLE_LENGTH]`.
-        """
-        lower = np.full((self.TOTAL_BALLS, 2), [-1, -1])
-        upper = np.full((self.TOTAL_BALLS, 2), [1, 1])
-        return spaces.Box(low=lower, high=upper, dtype=np.float32)
-
-    def _action_space(self) -> spaces.Box:
-        """
-        Get the action space of the environment.
-
-        The action space is a 5-dimensional box:
-        - a-offset: The offset of the cue ball in the x-coordinate.
-        - b-offset: The offset of the cue ball in the y-coordinate.
-        - theta: The angle of the shot in the yz-plane.
-        - phi: The angle of the shot.
-        - velocity: The power of the shot.
-
-        All values are in the range `[0, 1]`.
-        """
-        # return spaces.Box(
-        #     low=np.array([-1, -1, -1]),
-        #     high=np.array([1, 1, 1]),
-        #     dtype=np.float32,
-        # )
-        return FastFizActionWrapper.get_action_space(ActionSpaces.NO_OFFSET_4D)
-
-    def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
-        """
-        Check if the shot is possible.
-        """
-        return (
-            self.table_state.isPhysicallyPossible(shot_params)
-            == ff.TableState.OK_PRECONDITION
-        )

From dba9b07df58e239e6f23f3037ff19c65f33b53cd Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:45:59 +0200
Subject: [PATCH 18/44] Rename action space

---
 src/eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eval.py b/src/eval.py
index 7f64133..342b78b 100644
--- a/src/eval.py
+++ b/src/eval.py
@@ -111,7 +111,7 @@ def main() -> None:
 
     # env_vec = fastfiz_env.make("SimpleFastFiz-v0", reward_function=DefaultReward)
     # env_vec = FastFizActionWrapper(env_vec, ActionSpaces.NO_OFFSET_3D)
-    env = FastFizActionWrapper(PocketsFastFiz, ActionSpaces.NO_OFFSET_3D)
+    env = FastFizActionWrapper(PocketsFastFiz, ActionSpaces.VECTOR_3D)
     agent = Agent(model, env)
     play(agent.decide_shot, balls=2, episodes=100)
 

From d766fba73721f36c63c6cf718614872d094948a8 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:46:40 +0200
Subject: [PATCH 19/44] Rename action space

---
 src/fastfiz_env/make.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py
index 0cc5260..7fa8b2a 100644
--- a/src/fastfiz_env/make.py
+++ b/src/fastfiz_env/make.py
@@ -56,7 +56,7 @@ def make_wrapped_env(
         disable_env_checker=False,
         **kwargs,
     )
-    env = FastFizActionWrapper(env, action_space_id=ActionSpaces.NO_OFFSET_3D)
+    env = FastFizActionWrapper(env, action_space_id=ActionSpaces.VECTOR_3D)
     return env
 
 

From da7b09432f6e7ec46cb045e86bf030b8e70410db Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:47:39 +0200
Subject: [PATCH 20/44] Fix type hints

---
 src/fastfiz_env/reward_functions/combined_reward.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fastfiz_env/reward_functions/combined_reward.py b/src/fastfiz_env/reward_functions/combined_reward.py
index eb4bb4b..1bf24ca 100644
--- a/src/fastfiz_env/reward_functions/combined_reward.py
+++ b/src/fastfiz_env/reward_functions/combined_reward.py
@@ -1,3 +1,4 @@
+from typing import Optional
 from .reward_function import RewardFunction, Weight
 from .binary_reward import BinaryReward
 import fastfiz as ff
@@ -12,7 +13,7 @@ class CombinedReward(RewardFunction):
     def __init__(
         self,
         weight: Weight = 1,
-        max_episode_steps: int = None,
+        max_episode_steps: Optional[int] = None,
         *,
         reward_functions: list[RewardFunction],
         short_circuit: bool = False,
@@ -37,11 +38,11 @@ def __init__(
         self.max_episode_steps = max_episode_steps
 
     @property
-    def max_episode_steps(self) -> int:
+    def max_episode_steps(self) -> Optional[int]:
         return self._max_episode_steps
 
     @max_episode_steps.setter
-    def max_episode_steps(self, value: int) -> None:
+    def max_episode_steps(self, value: Optional[int]) -> None:
         self._max_episode_steps = value
         for reward in self.reward_functions:
             reward.max_episode_steps = value

From d09391fa629730a09a92b2d68360205a56bd3fd0 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:50:42 +0200
Subject: [PATCH 21/44] Fix type hints

---
 src/fastfiz_env/wrappers/action.py | 44 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py
index cd869f0..7e4dedc 100644
--- a/src/fastfiz_env/wrappers/action.py
+++ b/src/fastfiz_env/wrappers/action.py
@@ -116,18 +116,20 @@ def __init__(
         self.action_space = self.SPACES[action_space_id.name]
 
     def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[float, np.dtype[np.float32]]:
-        offset_a = 0
-        offset_b = 0
+        offset_a = 0.0
+        offset_b = 0.0
 
         match self.action_space_id:
             case ActionSpaces.VECTOR_2D:
-                theta = 20
+                theta = 20.0
                 phi = float(np.degrees(np.arctan2(action[1], action[0])) % self.MAX_PHI)
-                offset_b = 11
-                velocity = np.interp(
-                    np.hypot(*action),
-                    (0, np.sqrt(2)),
-                    (self.MIN_VELOCITY, self.MAX_VELOCITY - 5),
+                offset_b = 11.0
+                velocity = float(
+                    np.interp(
+                        np.hypot(*action),
+                        (0, np.sqrt(2)),
+                        (self.MIN_VELOCITY, self.MAX_VELOCITY - 5),
+                    )
                 )
             case ActionSpaces.VECTOR_3D:
                 x, y, z = action
@@ -135,23 +137,21 @@ def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[
                 phi, theta, r = sph2deg(az, el, r)
                 theta = float(np.interp(theta, (0, np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA)))
                 phi = float(np.interp(phi, (0, np.rad2deg(2 * np.pi)), (self.MIN_PHI, self.MAX_PHI)))
-                velocity = np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY))
+                velocity = float(np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)))
             case ActionSpaces.NORM_3D:
-                theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
-                phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
+                theta = float(np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)))
+                phi = float(np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI)))
+                velocity = float(np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)))
             case ActionSpaces.NORM_5D:
-                theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
-                phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
+                theta = float(np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)))
+                phi = float(np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)))
+                velocity = float(np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)))
             case ActionSpaces.OFFSET_NORM_5D:
-                offset_a = np.interp(action[0], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET))
-                offset_b = np.interp(action[1], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET))
-                theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))
-                phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))
-                velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
-
-                velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))
+                offset_a = float(np.interp(action[0], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET)))
+                offset_b = float(np.interp(action[1], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET)))
+                theta = float(np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)))
+                phi = float(np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)))
+                velocity = float(np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)))
 
         action = np.array([offset_a, offset_b, theta, phi, velocity])
         return action

From e4239638e0bee9881fded936b08f2d955ce45164 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:57:06 +0200
Subject: [PATCH 22/44] Check binary reward function instance

---
 src/fastfiz_env/reward_functions/combined_reward.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fastfiz_env/reward_functions/combined_reward.py b/src/fastfiz_env/reward_functions/combined_reward.py
index 1bf24ca..29e7140 100644
--- a/src/fastfiz_env/reward_functions/combined_reward.py
+++ b/src/fastfiz_env/reward_functions/combined_reward.py
@@ -70,12 +70,12 @@ def reward(
             float: The combined, weighted reward.
 
         """
-        total_reward = 0
+        total_reward = 0.0
         for reward_function in self.reward_functions:
             reward = reward_function.get_reward(prev_table_state, table_state, action)
             total_reward += reward
 
-            if issubclass(reward_function.__class__, BinaryReward):
+            if isinstance(reward_function, BinaryReward):
                 if reward == 1 * reward_function.weight() and self.short_circuit and reward_function.short_circuit:
                     return total_reward
 

From 8971d0a6a6dfcd01637206047662b3dcf40ba8fb Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:57:24 +0200
Subject: [PATCH 23/44] Remove type hint

---
 src/fastfiz_env/utils/fastfiz/renderer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fastfiz_env/utils/fastfiz/renderer.py b/src/fastfiz_env/utils/fastfiz/renderer.py
index 1a44b29..9a64b3e 100644
--- a/src/fastfiz_env/utils/fastfiz/renderer.py
+++ b/src/fastfiz_env/utils/fastfiz/renderer.py
@@ -83,7 +83,6 @@ def _get_relevant_ball_states_from_shot(self, shot: ff.Shot):
         relevant_states: list[_BallState] = []
 
         for event in shot.getEventList():
-            event: ff.Event
             if event.getBall1() == self.number:
                 new_ball_event = _BallState.from_event_and_ball(event, event.getBall1Data())
                 relevant_states.append(new_ball_event)

From d8d53a560f603124d7392ca6e8768edc93c537b9 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:57:36 +0200
Subject: [PATCH 24/44] Fix type hints

---
 src/fastfiz_env/utils/fastfiz/fastfiz.py | 76 ++++++------------------
 1 file changed, 18 insertions(+), 58 deletions(-)

diff --git a/src/fastfiz_env/utils/fastfiz/fastfiz.py b/src/fastfiz_env/utils/fastfiz/fastfiz.py
index 78727f6..8c65aeb 100644
--- a/src/fastfiz_env/utils/fastfiz/fastfiz.py
+++ b/src/fastfiz_env/utils/fastfiz/fastfiz.py
@@ -30,8 +30,7 @@ def get_ball_positions(table_state: ff.TableState) -> np.ndarray:
     for i in range(table_state.getNumBalls()):
         pos = table_state.getBall(i).getPos()
         balls.append((pos.x, pos.y))
-    balls = np.array(balls)
-    return balls
+    return np.array(balls)
 
 
 def num_balls_in_play(table_state: ff.TableState) -> int:
@@ -44,13 +43,7 @@ def num_balls_in_play(table_state: ff.TableState) -> int:
     Returns:
         int: The number of balls in play.
     """
-    return len(
-        [
-            i
-            for i in range(table_state.getNumBalls())
-            if table_state.getBall(i).isInPlay()
-        ]
-    )
+    return len([i for i in range(table_state.getNumBalls()) if table_state.getBall(i).isInPlay()])
 
 
 def num_balls_pocketed(
@@ -71,14 +64,10 @@ def num_balls_pocketed(
         int: The number of balls pocketed.
     """
     stop = table_state.getNumBalls() if range_stop is None else range_stop
-    return len(
-        [i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()]
-    )
+    return len([i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()])
 
 
-def any_ball_has_moved(
-    prev_ball_positions: np.ndarray, ball_positions: np.ndarray
-) -> bool:
+def any_ball_has_moved(prev_ball_positions: np.ndarray, ball_positions: np.ndarray) -> bool:
     """
     Check if any ball has moved by comparing the previous ball positions with the current ball positions.
 
@@ -147,7 +136,7 @@ def distance_to_pocket(ball_position: np.ndarray, pocket: np.ndarray) -> float:
     Returns:
         float: The Euclidean distance between the ball position and the pocket.
     """
-    return np.linalg.norm(pocket - ball_position)
+    return float(np.linalg.norm(pocket - ball_position))
 
 
 def distance_to_pockets(ball_position: np.ndarray, pockets: np.ndarray) -> np.ndarray:
@@ -176,9 +165,7 @@ def distance_to_closest_pocket(ball_position: np.ndarray, pockets: np.ndarray) -
     return np.min(distance_to_pockets(ball_position, pockets))
 
 
-def distances_to_closest_pocket(
-    ball_positions: np.ndarray, pockets: np.ndarray
-) -> np.ndarray:
+def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) -> np.ndarray:
     """
     Calculates the distances from each ball position to the closest pocket.
 
@@ -189,12 +176,7 @@ def distances_to_closest_pocket(
     Returns:
         np.ndarray: An array of distances from each ball position to the closest pocket.
     """
-    return np.array(
-        [
-            distance_to_closest_pocket(ball_position, pockets)
-            for ball_position in ball_positions
-        ]
-    )
+    return np.array([distance_to_closest_pocket(ball_position, pockets) for ball_position in ball_positions])
 
 
 def create_table_state(n_balls: int) -> ff.TableState:
@@ -223,9 +205,7 @@ def create_table_state(n_balls: int) -> ff.TableState:
     return table_state
 
 
-def create_random_table_state(
-    n_balls: int, seed: Optional[int] = None
-) -> ff.TableState:
+def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.TableState:
     """
     Creates a random table state with the specified number of balls.
 
@@ -241,9 +221,7 @@ def create_random_table_state(
     return table_state
 
 
-def randomize_table_state(
-    table_state: ff.TableState, seed: Optional[int] = None
-) -> ff.TableState:
+def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None) -> ff.TableState:
     """
     Randomizes the positions of the balls on the pool table within the given table state.
 
@@ -297,9 +275,7 @@ def randomize_table_state(
     return table_state
 
 
-def map_action_to_shot_params(
-    table_state: ff.TableState, action: np.ndarray
-) -> np.ndarray:
+def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray:
     """
     Maps the given action values to the corresponding shot parameters within the specified ranges.
 
@@ -313,17 +289,13 @@ def map_action_to_shot_params(
     """
     # a = np.interp(action[0], [0, 0], [0, 0])
     # b = np.interp(action[1], [0, 0], [0, 0])
-    theta = np.interp(
-        action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001]
-    )
+    theta = np.interp(action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001])
     phi = np.interp(action[3], [-1, 1], [0, 360])
     v = np.interp(action[4], [-1, 1], [0, table_state.MAX_VELOCITY - 0.001])
     return np.array([0, 0, theta, phi, v], dtype=np.float64)
 
 
-def shot_params_from_action(
-    table_state: ff.TableState, action: np.ndarray
-) -> ff.ShotParams:
+def shot_params_from_action(table_state: ff.TableState, action: np.ndarray) -> ff.ShotParams:
     """
     Converts an action into shot parameters.
 
@@ -349,21 +321,11 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam
     MAX_THETA = ff.TableState.MAX_THETA
     MAX_VELOCITY = ff.TableState.MAX_VELOCITY
 
-    a = np.interp(
-        action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET]
-    )
-    b = np.interp(
-        action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET]
-    )
-    theta = np.interp(
-        action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA]
-    )
-    phi = np.interp(
-        action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI]
-    )
-    velocity = np.interp(
-        action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY]
-    )
+    a = np.interp(action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET])
+    b = np.interp(action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET])
+    theta = np.interp(action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA])
+    phi = np.interp(action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI])
+    velocity = np.interp(action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY])
 
     # print(f"a: {a}, b: {b}, theta: {theta}, phi: {phi}, velocity: {velocity}")
 
@@ -445,9 +407,7 @@ def get_ball_positions_id(table_state: ff.TableState) -> np.ndarray:
         ball = table_state.getBall(i)
         pos = ball.getPos()
         balls.append([ball.getID(), pos.x, pos.y])
-
-    balls = np.array(balls)
-    return balls
+    return np.array(balls)
 
 
 def is_pocketed_state(state: int) -> bool:

From 5a4d634655abd6c1d0ca93f238469b7ad5032d10 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:57:46 +0200
Subject: [PATCH 25/44] Fix type hints

---
 src/fastfiz_env/reward_functions/common/velocity_reward.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/fastfiz_env/reward_functions/common/velocity_reward.py b/src/fastfiz_env/reward_functions/common/velocity_reward.py
index ad21006..8ca998a 100644
--- a/src/fastfiz_env/reward_functions/common/velocity_reward.py
+++ b/src/fastfiz_env/reward_functions/common/velocity_reward.py
@@ -17,5 +17,4 @@ def reward(
         """
         Reward function that gives a reward based on velocity of the action.
         """
-        reward = np.interp(action[4], [0, 10], [0, 1])
-        return reward
+        return float(np.interp(action[4], [0, 10], [0, 1]))

From 35e5598ced85ff5d3d38f4f4f6b00b5b3137e931 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 22:58:48 +0200
Subject: [PATCH 26/44] Format with Ruff

---
 src/fastfiz_env/envs/frames_fastfiz.py        |  24 +-
 .../reward_functions/common/weights.py        |   2 -
 src/tes.ipynb                                 | 260 ++++++++++++++++++
 src/tests/utils/test_reward_functions.py      |  92 ++-----
 4 files changed, 289 insertions(+), 89 deletions(-)
 create mode 100644 src/tes.ipynb

diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py
index 4f13845..b869cd4 100644
--- a/src/fastfiz_env/envs/frames_fastfiz.py
+++ b/src/fastfiz_env/envs/frames_fastfiz.py
@@ -26,9 +26,7 @@ class FramesFastFiz(gym.Env):
     TOTAL_BALLS = 16  # Including the cue ball
     num_balls = 2
 
-    def __init__(
-        self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16
-    ) -> None:
+    def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16) -> None:
         super().__init__()
         if num_balls < 2:
             warnings.warn(
@@ -45,15 +43,11 @@ def __init__(
 
     def _max_episode_steps(self):
         if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None:
-            self.max_episode_steps = self.get_wrapper_attr(
-                "_time_limit_max_episode_steps"
-            )
+            self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps")
             print(f"Setting max episode steps to {self.max_episode_steps}")
             self.reward.max_episode_steps = self.max_episode_steps
 
-    def reset(
-        self, *, seed: Optional[int] = None, options: Optional[dict] = None
-    ) -> tuple[np.ndarray, dict]:
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
         """
         Reset the environment to its initial state.
         """
@@ -174,14 +168,9 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool:
         """
         Check if the shot is possible.
         """
-        return (
-            self.table_state.isPhysicallyPossible(shot_params)
-            == ff.TableState.OK_PRECONDITION
-        )
+        return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION
 
-    def _compute_observation(
-        self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]
-    ) -> np.ndarray:
+    def _compute_observation(self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]) -> np.ndarray:
         return self.compute_observation(prev_table_state, self.table_state, shot)
 
     @classmethod
@@ -223,8 +212,7 @@ def compute_observation(
                 pocketed = is_pocketed_state(gb.state)
                 frames_seq[frame][gb.number] = [
                     *normalize_ball_positions((gb.position.x, gb.position.y)),  # type: ignore
-                    normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2
-                    - 1,
+                    normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - 1,
                     pocketed,
                 ]
         return frames_seq
diff --git a/src/fastfiz_env/reward_functions/common/weights.py b/src/fastfiz_env/reward_functions/common/weights.py
index 0590096..a2bd59d 100644
--- a/src/fastfiz_env/reward_functions/common/weights.py
+++ b/src/fastfiz_env/reward_functions/common/weights.py
@@ -1,5 +1,3 @@
-
-
 ConstantWeight = 1
 NegativeConstantWeight = -ConstantWeight
 
diff --git a/src/tes.ipynb b/src/tes.ipynb
new file mode 100644
index 0000000..507ccab
--- /dev/null
+++ b/src/tes.ipynb
@@ -0,0 +1,260 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def vec_length(vec: np.ndarray) -> float:\n",
+    "    \"\"\"\n",
+    "    Gets the length of a vector.\n",
+    "    \"\"\"\n",
+    "    return float(np.linalg.norm(vec))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def spherical_coordinates(vector: np.ndarray) -> tuple[float, float, float]:\n",
+    "    \"\"\"\n",
+    "    Converts a vector to spherical coordinates.\n",
+    "\n",
+    "    Returns:\n",
+    "        r: float - The magnitude of the vector.\n",
+    "        theta: float - The angle from the z-axis.\n",
+    "        phi: float - The angle in the xy-plane.\n",
+    "    \"\"\"\n",
+    "    assert len(vector) == 3, \"Vector must have excatly 3 components.\"\n",
+    "    Vx, Vy, Vz = vector\n",
+    "    r = vec_length(vector)\n",
+    "    theta = np.rad2deg(np.arccos(Vz / r))\n",
+    "    phi = np.arctan2(Vy, Vx)  # Using arctan2 to get correct quadrant\n",
+    "\n",
+    "    # phi = np.rad2deg((phi + 2 * np.pi) % (2 * np.pi))\n",
+    "\n",
+    "    return r, theta, np.rad2deg(phi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cart2sph(x, y, z):\n",
+    "    hxy = np.hypot(x, y)\n",
+    "    r = np.hypot(hxy, z)\n",
+    "    el = np.arctan2(z, hxy)\n",
+    "    az = np.arctan2(y, x)\n",
+    "    return az, el, r"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cart2params(x, y, z):\n",
+    "    az, el, r = cart2sph(x, y, z)\n",
+    "    az = np.rad2deg(az)\n",
+    "    el = np.rad2deg(el)\n",
+    "    return r, el, az"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cartesian_to_spherical(x, y, z):\n",
+    "    r = np.sqrt(x**2 + y**2 + z**2)\n",
+    "    theta = np.rad2deg(np.arccos(z / r))\n",
+    "    phi = np.rad2deg(np.arctan2(y, x))\n",
+    "    return r, theta, phi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(1.044030650891055, 90.0, 16.69924423399362)\n",
+      "(1.044030650891055, 90.0, 16.69924423399362)\n",
+      "(1.044030650891055, 0.0, 16.69924423399362)\n"
+     ]
+    }
+   ],
+   "source": [
+    "vec = np.array([1, 0.30, 0])\n",
+    "print(spherical_coordinates(vec))\n",
+    "print(cartesian_to_spherical(*vec))\n",
+    "print(cart2params(*vec))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1.4142135623730951, 45.00000000000001, 0.0)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spherical_coordinates(vec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.0, 0.7853981633974483, 1.4142135623730951)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cart2sph(*vec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:\n",
+    "    \"\"\"\n",
+    "    Convert Cartesian coordinates to spherical coordinates.\n",
+    "\n",
+    "    Args:\n",
+    "        x (float): x-coordinate.\n",
+    "        y (float): y-coordinate.\n",
+    "        z (float): z-coordinate.\n",
+    "\n",
+    "    Returns:\n",
+    "        tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius.\n",
+    "    \"\"\"\n",
+    "    hxy: float = np.hypot(x, y)\n",
+    "    r: float = np.hypot(hxy, z)\n",
+    "    el: float = np.arctan2(z, hxy)\n",
+    "    az: float = np.arctan2(y, x)\n",
+    "    return az, el, r\n",
+    "\n",
+    "\n",
+    "def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]:\n",
+    "    \"\"\"\n",
+    "    Convert spherical coordinates to degrees.\n",
+    "\n",
+    "    Args:\n",
+    "        az (float): Azimuth angle in radians.\n",
+    "        el (float): Elevation angle in radians.\n",
+    "        r (float): Radius.\n",
+    "\n",
+    "    Returns:\n",
+    "        tuple[float, float, float]: A tuple containing elevation angle (in degrees), azimuth angle (in degrees), and radius.\n",
+    "    \"\"\"\n",
+    "    theta: float = np.rad2deg(el) % 180\n",
+    "    phi: float = np.rad2deg(az) % 360\n",
+    "    return theta, phi, r"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 161,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(35.264389682754654, 45.0, 346.41016151377545)"
+      ]
+     },
+     "execution_count": 161,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vec = np.array([200, 200, 200])\n",
+    "cart2sph(*vec)\n",
+    "sph2deg(*cart2sph(*vec))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "180.0"
+      ]
+     },
+     "execution_count": 123,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.rad2deg(np.pi)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py
index 5caf0ff..dabe1df 100644
--- a/src/tests/utils/test_reward_functions.py
+++ b/src/tests/utils/test_reward_functions.py
@@ -27,9 +27,7 @@ def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float
 
 
 class TestRewardFunctions(unittest.TestCase):
-    possible_shot_action = np.array(
-        [0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64
-    )
+    possible_shot_action = np.array([0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64)
     impossible_shot_action = np.array([0, 0, 0, 0, 0], dtype=np.float64)
     empty_action = np.array([], dtype=np.float64)
 
@@ -40,12 +38,8 @@ def test_step_pocketed_reward(self):
         reward = StepPocketedReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
 
     def test_game_won_reward(self):
         table_state = create_table_state(3)
@@ -58,12 +52,8 @@ def test_game_won_reward(self):
 
         reward = GameWonReward()
         reward.reset(table_state)
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
 
     def test_constant_reward(self):
         table_state = create_table_state(2)
@@ -71,12 +61,8 @@ def test_constant_reward(self):
         reward = ConstantReward(weight=weight_fn, max_episode_steps=10)
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10)
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10)
 
     def test_cue_ball_pocketed_reward(self):
         table_state = create_table_state(2)
@@ -86,12 +72,8 @@ def test_cue_ball_pocketed_reward(self):
         reward = CueBallPocketedReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1)
 
     def test_cue_ball_not_moved_reward(self):
         table_state = create_table_state(2)
@@ -101,27 +83,17 @@ def test_cue_ball_not_moved_reward(self):
         reward = CueBallNotMovedReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 1
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state_moved, self.empty_action), 0
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1)
+        self.assertEqual(reward.get_reward(table_state, table_state_moved, self.empty_action), 0)
 
     def test_impossible_shot_reward(self):
         table_state = create_table_state(2)
         reward = ImpossibleShotReward()
         reward.reset(table_state)
 
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.possible_shot_action), 0
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.impossible_shot_action), 1
-        )
-        self.assertEqual(
-            reward.get_reward(table_state, table_state, self.empty_action), 1
-        )
+        self.assertEqual(reward.get_reward(table_state, table_state, self.possible_shot_action), 0)
+        self.assertEqual(reward.get_reward(table_state, table_state, self.impossible_shot_action), 1)
+        self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1)
 
     def test_delta_best_total_distance_reward(self):
         # Pocket: [0.,  1.118]
@@ -133,9 +105,7 @@ def test_delta_best_total_distance_reward(self):
         table_state = create_table_state(2)
         table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118))
 
-        self.assertEqual(
-            reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0
-        )
+        self.assertEqual(reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0)
         self.assertEqual(
             reward.get_reward(prev_table_state, table_state, self.empty_action),
             0.25,
@@ -185,9 +155,7 @@ def test_combined_reward(self):
         table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118))
 
         self.assertEqual(
-            reward_function.get_reward(
-                prev_table_state, table_state, self.empty_action
-            ),
+            reward_function.get_reward(prev_table_state, table_state, self.empty_action),
             3.3,
         )
 
@@ -196,22 +164,16 @@ def test_binary_reward_no_short_circuit(self):
             ImpossibleShotReward(10, short_circuit=False),
             ConstantReward(5),
         ]
-        reward_function = CombinedReward(
-            reward_functions=rewards_functions, short_circuit=True
-        )
+        reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True)
         table_state = create_table_state(2)
         reward_function.reset(table_state)
 
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.possible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.possible_shot_action),
             5,
         )
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.impossible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.impossible_shot_action),
             15,
         )
 
@@ -220,30 +182,22 @@ def test_binary_reward_short_circuit(self):
             ImpossibleShotReward(10, short_circuit=True),
             ConstantReward(5),
         ]
-        reward_function = CombinedReward(
-            reward_functions=rewards_functions, short_circuit=True
-        )
+        reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True)
         table_state = create_table_state(2)
         reward_function.reset(table_state)
 
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.possible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.possible_shot_action),
             5,
         )
         self.assertEqual(
-            reward_function.get_reward(
-                table_state, table_state, self.impossible_shot_action
-            ),
+            reward_function.get_reward(table_state, table_state, self.impossible_shot_action),
             10,
         )
 
     def test_weights(self):
         table_state = create_table_state(3)
-        reward = ConstantReward(
-            weight=NegativeConstantWeightMaxSteps, max_episode_steps=10
-        )
+        reward = ConstantReward(weight=NegativeConstantWeightMaxSteps, max_episode_steps=10)
         reward.reset(table_state)
 
         self.assertEqual(

From 1558c51779b9af2a443191c4904128703a2e4a2c Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:02:10 +0200
Subject: [PATCH 27/44] Rename test

---
 src/tests/utils/test_features.py | 27 ---------------------------
 src/tests/utils/test_wrappers.py | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 27 deletions(-)
 delete mode 100644 src/tests/utils/test_features.py
 create mode 100644 src/tests/utils/test_wrappers.py

diff --git a/src/tests/utils/test_features.py b/src/tests/utils/test_features.py
deleted file mode 100644
index 8a8e2e5..0000000
--- a/src/tests/utils/test_features.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import unittest
-from fastfiz_env.wrappers.utils import deg_to_vec, vec_to_deg, vec_to_abs_deg
-import numpy as np
-
-
-class TestFeatures(unittest.TestCase):
-    def test_deg_to_vec(self):
-        self.assertTrue(np.allclose(deg_to_vec(0), [1, 0]))
-        self.assertTrue(np.allclose(deg_to_vec(90), [0, 1]))
-        self.assertTrue(np.allclose(deg_to_vec(180), [-1, 0]))
-        self.assertTrue(np.allclose(deg_to_vec(270), [0, -1]))
-
-    def test_vec_to_abs_deg(self):
-        self.assertEqual(vec_to_abs_deg([1, 0]), 0)
-        self.assertEqual(vec_to_abs_deg([0, 1]), 90)
-        self.assertEqual(vec_to_abs_deg([-1, 0]), 180)
-        self.assertEqual(vec_to_abs_deg([0, -1]), 270)
-
-    def test_vec_to_deg(self):
-        self.assertEqual(vec_to_deg([1, 0]), 0)
-        self.assertEqual(vec_to_deg([0, 1]), 90)
-        self.assertEqual(vec_to_deg([-1, 0]), 180)
-        self.assertEqual(vec_to_deg([0, -1]), -90)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/src/tests/utils/test_wrappers.py b/src/tests/utils/test_wrappers.py
new file mode 100644
index 0000000..99062d3
--- /dev/null
+++ b/src/tests/utils/test_wrappers.py
@@ -0,0 +1,22 @@
+import unittest
+from fastfiz_env.wrappers.utils import cart2sph, sph2deg
+
+
+class TestFeatures(unittest.TestCase):
+    def test_cart2sph(self):
+        x, y, z = 1, 1, 1
+        az, el, r = cart2sph(x, y, z)
+        self.assertAlmostEqual(az, 0.7853981633974483)
+        self.assertAlmostEqual(el, 0.6154797086703873)
+        self.assertAlmostEqual(r, 1.7320508075688772)
+
+    def test_sph2deg(self):
+        az, el, r = 0.7853981633974483, 0.6154797086703873, 1.7320508075688772
+        phi, theta, r = sph2deg(az, el, r)
+        self.assertAlmostEqual(phi, 45.0)
+        self.assertAlmostEqual(theta, 35.26438968275466)
+        self.assertAlmostEqual(r, 1.7320508075688772)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6e627a276e7b3efc09dab9fcd3cd90cfddc5abd6 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:02:24 +0200
Subject: [PATCH 28/44] Remove unsued utils

---
 src/fastfiz_env/wrappers/action.py |  2 +-
 src/fastfiz_env/wrappers/utils.py  | 36 ------------------------------
 2 files changed, 1 insertion(+), 37 deletions(-)

diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py
index 7e4dedc..7db9aa9 100644
--- a/src/fastfiz_env/wrappers/action.py
+++ b/src/fastfiz_env/wrappers/action.py
@@ -1,4 +1,4 @@
-from .utils import vec_to_abs_deg, cart2sph, sph2deg
+from .utils import cart2sph, sph2deg
 from gymnasium import ActionWrapper
 from gymnasium import spaces
 import numpy as np
diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py
index 9a89002..caa277e 100644
--- a/src/fastfiz_env/wrappers/utils.py
+++ b/src/fastfiz_env/wrappers/utils.py
@@ -1,42 +1,6 @@
 import numpy as np
 
 
-def deg_to_vec(deg: float) -> np.ndarray:
-    """
-    Gets the vector of an angle.
-    """
-    rad = np.deg2rad(deg)
-    return np.array([np.cos(rad), np.sin(rad)], dtype=np.float32)
-
-
-def vec_to_deg(vec: np.ndarray) -> float:
-    """
-    Gets the angle of a vector.
-    """
-    return float(np.rad2deg(np.arctan2(vec[1], vec[0])))
-
-
-def vec_to_abs_deg(vec: np.ndarray) -> float:
-    """
-    Gets the absolute angle of a vector.
-    """
-    return vec_to_deg(vec) % 360
-
-
-def vec_mag(vec: np.ndarray) -> float:
-    """
-    Gets the length of a vector.
-    """
-    return float(np.linalg.norm(vec))
-
-
-def vec_norm(vec: np.ndarray) -> np.ndarray:
-    """
-    Gets the unit vector of a vector.
-    """
-    return vec / vec_mag(vec)
-
-
 def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:
     """
     Convert Cartesian coordinates to spherical coordinates.

From f57da5ae428c239a8a2479bd08b99329acb8c620 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:25:03 +0200
Subject: [PATCH 29/44] Fix workflow

---
 .github/workflows/{lint.yml => ci.yml} | 19 ++++++++----
 .github/workflows/python-package.yml   | 40 --------------------------
 2 files changed, 14 insertions(+), 45 deletions(-)
 rename .github/workflows/{lint.yml => ci.yml} (66%)
 delete mode 100644 .github/workflows/python-package.yml

diff --git a/.github/workflows/lint.yml b/.github/workflows/ci.yml
similarity index 66%
rename from .github/workflows/lint.yml
rename to .github/workflows/ci.yml
index 0405eb7..5d16d87 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/ci.yml
@@ -1,12 +1,11 @@
-name: Lint Python package
-
+name: Python package
 on:
     push:
     pull_request:
         branches: ['main']
 
 jobs:
-    lint:
+    build:
         runs-on: ubuntu-latest
         strategy:
             fail-fast: false
@@ -14,7 +13,6 @@ jobs:
                 python-version: ['3.10', '3.11']
         steps:
             - uses: actions/checkout@v4
-            - uses: chartboost/ruff-action@v1
             - name: Set up Python ${{ matrix.python-version }}
               uses: actions/setup-python@v5
               with:
@@ -22,9 +20,20 @@ jobs:
                   cache: 'pip'
             - name: Install dependencies
               run: |
-                  sudo apt-get install python3-opengl
+                  sudo apt update
+                  sudo apt install python3-opengl swig libgsl-dev
                   python -m pip install --upgrade pip
                   pip install ".[test]"
+
+            - name: Lint with Ruff
+              run: |
+                  ruff check src/fastfiz_env
+                  ruff format src/fastfiz_env
+
             - name: Run MyPy
               run: |
                   mypy src/fastfiz_env
+
+            - name: Test with pytest
+              run: |
+                  pytest
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
deleted file mode 100644
index cbbbaa6..0000000
--- a/.github/workflows/python-package.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: Python package
-
-on:
-    push:
-    pull_request:
-        branches: ['main']
-
-jobs:
-    build:
-        runs-on: ubuntu-latest
-        strategy:
-            fail-fast: false
-            matrix:
-                python-version: ['3.10', '3.11']
-
-        steps:
-            - uses: actions/checkout@v4
-            - name: Set up Python ${{ matrix.python-version }}
-              uses: actions/setup-python@v5
-              with:
-                  python-version: ${{ matrix.python-version }}
-                  cache: 'pip'
-            - name: Install dependencies
-              run: |
-                  sudo apt-get install python3-opengl
-                  python -m pip install --upgrade pip
-                  python -m pip install flake8 pytest
-                  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-            - name: Lint with flake8
-              run: |
-                  # stop the build if there are Python syntax errors or undefined names
-                  flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-                  # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-                  flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-            - name: Test with pytest
-              run: |
-                  pytest

From b2d3ae3f53f96ca98d620bb78bc0fd6de4ba8244 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:25:54 +0200
Subject: [PATCH 30/44] Fix workflow

---
 .github/workflows/test.yml | 34 ----------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
deleted file mode 100644
index 7287df4..0000000
--- a/.github/workflows/test.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: Test Python package
-
-on:
-    push:
-    pull_request:
-        branches: ['main']
-
-jobs:
-    test:
-        runs-on: ubuntu-latest
-        strategy:
-            fail-fast: false
-            matrix:
-                python-version: ['3.10', '3.11']
-
-        steps:
-            - uses: actions/checkout@v4
-            - name: Set up Python ${{ matrix.python-version }}
-              uses: actions/setup-python@v5
-              with:
-                  python-version: ${{ matrix.python-version }}
-                  cache: 'pip'
-            - name: Install dependencies
-              run: |
-                  sudo apt update
-                  sudo apt install python3-opengl swig libgsl-dev
-                  python -m pip install --upgrade pip
-                  pip install ".[test]"
-            - name: Test with pytest
-              run: |
-                  pytest

From 59ebf1a893e9e3a8866410fe4e2e2294d9129dfd Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:35:01 +0200
Subject: [PATCH 31/44] Remove requirements.txt

---
 requirements.txt | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index ab5aac4..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-fastfiz @ git+https://github.com/P6-Pool/fastfiz.git@2af8aed22bec1faeb5ac92b98b0751a0023f3fb7
-fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062
-p5 @ git+https://github.com/P6-Pool/p5.git@19b96bc00be29d624982c6ecee23fba104457277
-gymnasium==0.29.1
-stable-baselines3==2.2.1
-tensorboard==2.16.2
-tensorflow==2.16.1
-vectormath==0.2.2
-optuna==3.6.1

From 72fdd2cf6772944a5a7314439ae4a3684060a068 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:37:51 +0200
Subject: [PATCH 32/44] Remove .vscode

---
 .vscode/settings.json | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 1b59914..0000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "python.analysis.typeCheckingMode": "basic",
-    "python.analysis.autoImportCompletions": true
-}

From f418e1e387815c993a88f3c7550a69bf3631806b Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 18 Apr 2024 23:53:37 +0200
Subject: [PATCH 33/44] Remove unused

---
 src/tes.ipynb | 260 --------------------------------------------------
 1 file changed, 260 deletions(-)
 delete mode 100644 src/tes.ipynb

diff --git a/src/tes.ipynb b/src/tes.ipynb
deleted file mode 100644
index 507ccab..0000000
--- a/src/tes.ipynb
+++ /dev/null
@@ -1,260 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def vec_length(vec: np.ndarray) -> float:\n",
-    "    \"\"\"\n",
-    "    Gets the length of a vector.\n",
-    "    \"\"\"\n",
-    "    return float(np.linalg.norm(vec))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def spherical_coordinates(vector: np.ndarray) -> tuple[float, float, float]:\n",
-    "    \"\"\"\n",
-    "    Converts a vector to spherical coordinates.\n",
-    "\n",
-    "    Returns:\n",
-    "        r: float - The magnitude of the vector.\n",
-    "        theta: float - The angle from the z-axis.\n",
-    "        phi: float - The angle in the xy-plane.\n",
-    "    \"\"\"\n",
-    "    assert len(vector) == 3, \"Vector must have excatly 3 components.\"\n",
-    "    Vx, Vy, Vz = vector\n",
-    "    r = vec_length(vector)\n",
-    "    theta = np.rad2deg(np.arccos(Vz / r))\n",
-    "    phi = np.arctan2(Vy, Vx)  # Using arctan2 to get correct quadrant\n",
-    "\n",
-    "    # phi = np.rad2deg((phi + 2 * np.pi) % (2 * np.pi))\n",
-    "\n",
-    "    return r, theta, np.rad2deg(phi)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cart2sph(x, y, z):\n",
-    "    hxy = np.hypot(x, y)\n",
-    "    r = np.hypot(hxy, z)\n",
-    "    el = np.arctan2(z, hxy)\n",
-    "    az = np.arctan2(y, x)\n",
-    "    return az, el, r"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cart2params(x, y, z):\n",
-    "    az, el, r = cart2sph(x, y, z)\n",
-    "    az = np.rad2deg(az)\n",
-    "    el = np.rad2deg(el)\n",
-    "    return r, el, az"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cartesian_to_spherical(x, y, z):\n",
-    "    r = np.sqrt(x**2 + y**2 + z**2)\n",
-    "    theta = np.rad2deg(np.arccos(z / r))\n",
-    "    phi = np.rad2deg(np.arctan2(y, x))\n",
-    "    return r, theta, phi"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1.044030650891055, 90.0, 16.69924423399362)\n",
-      "(1.044030650891055, 90.0, 16.69924423399362)\n",
-      "(1.044030650891055, 0.0, 16.69924423399362)\n"
-     ]
-    }
-   ],
-   "source": [
-    "vec = np.array([1, 0.30, 0])\n",
-    "print(spherical_coordinates(vec))\n",
-    "print(cartesian_to_spherical(*vec))\n",
-    "print(cart2params(*vec))\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1.4142135623730951, 45.00000000000001, 0.0)"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "spherical_coordinates(vec)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.0, 0.7853981633974483, 1.4142135623730951)"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cart2sph(*vec)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 155,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:\n",
-    "    \"\"\"\n",
-    "    Convert Cartesian coordinates to spherical coordinates.\n",
-    "\n",
-    "    Args:\n",
-    "        x (float): x-coordinate.\n",
-    "        y (float): y-coordinate.\n",
-    "        z (float): z-coordinate.\n",
-    "\n",
-    "    Returns:\n",
-    "        tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius.\n",
-    "    \"\"\"\n",
-    "    hxy: float = np.hypot(x, y)\n",
-    "    r: float = np.hypot(hxy, z)\n",
-    "    el: float = np.arctan2(z, hxy)\n",
-    "    az: float = np.arctan2(y, x)\n",
-    "    return az, el, r\n",
-    "\n",
-    "\n",
-    "def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]:\n",
-    "    \"\"\"\n",
-    "    Convert spherical coordinates to degrees.\n",
-    "\n",
-    "    Args:\n",
-    "        az (float): Azimuth angle in radians.\n",
-    "        el (float): Elevation angle in radians.\n",
-    "        r (float): Radius.\n",
-    "\n",
-    "    Returns:\n",
-    "        tuple[float, float, float]: A tuple containing elevation angle (in degrees), azimuth angle (in degrees), and radius.\n",
-    "    \"\"\"\n",
-    "    theta: float = np.rad2deg(el) % 180\n",
-    "    phi: float = np.rad2deg(az) % 360\n",
-    "    return theta, phi, r"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 161,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(35.264389682754654, 45.0, 346.41016151377545)"
-      ]
-     },
-     "execution_count": 161,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "vec = np.array([200, 200, 200])\n",
-    "cart2sph(*vec)\n",
-    "sph2deg(*cart2sph(*vec))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 123,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "180.0"
-      ]
-     },
-     "execution_count": 123,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.rad2deg(np.pi)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From 9bde5bd40edcb1dbff374604fd595a671ccd1d7d Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 23 Apr 2024 23:06:17 +0200
Subject: [PATCH 34/44] Fix cart2sph

---
 src/fastfiz_env/wrappers/action.py |  8 ++++----
 src/fastfiz_env/wrappers/utils.py  | 23 +++++++++++------------
 src/tests/utils/test_wrappers.py   | 14 +++++++-------
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py
index 7db9aa9..149ff1e 100644
--- a/src/fastfiz_env/wrappers/action.py
+++ b/src/fastfiz_env/wrappers/action.py
@@ -133,10 +133,10 @@ def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[
                 )
             case ActionSpaces.VECTOR_3D:
                 x, y, z = action
-                az, el, r = cart2sph(x, y, z)
-                phi, theta, r = sph2deg(az, el, r)
-                theta = float(np.interp(theta, (0, np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA)))
-                phi = float(np.interp(phi, (0, np.rad2deg(2 * np.pi)), (self.MIN_PHI, self.MAX_PHI)))
+                r, el, az = cart2sph(x, y, z)
+                r, theta, phi = sph2deg(r, el, az)
+                phi = float(np.interp(az, (0, np.rad2deg(np.pi)), (self.MIN_PHI, self.MAX_PHI)))
+                theta = float(np.interp(el, (-np.rad2deg(np.pi), np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA)))
                 velocity = float(np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)))
             case ActionSpaces.NORM_3D:
                 theta = float(np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)))
diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py
index caa277e..0bfe321 100644
--- a/src/fastfiz_env/wrappers/utils.py
+++ b/src/fastfiz_env/wrappers/utils.py
@@ -11,27 +11,26 @@ def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:
         z (float): z-coordinate.
 
     Returns:
-        tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius.
+        tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in radians), and azimuth angle (phi, in radians).
     """
-    hxy: float = np.hypot(x, y)
-    r: float = np.hypot(hxy, z)
-    el: float = np.arctan2(z, hxy)
+    r = np.sqrt(x**2 + y**2 + z**2)
+    el: float = np.arccos(z / r)
     az: float = np.arctan2(y, x)
-    return az, el, r
+    return r, el, az
 
 
-def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]:
+def sph2deg(r: float, el: float, az: float) -> tuple[float, float, float]:
     """
     Convert spherical coordinates to degrees.
 
     Args:
-        az (float): Azimuth angle in radians.
-        el (float): Elevation angle in radians.
         r (float): Radius.
+        el (float): Elevation angle in radians.
+        az (float): Azimuth angle in radians.
 
     Returns:
-        tuple[float, float, float]: A tuple containing azimuth angle (phi, in degrees), elevation angle (theta, in degrees), and radius.
+        tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in degrees), and azimuth angle (phi, in degrees).
     """
-    phi: float = np.rad2deg(az % (2 * np.pi))
-    theta: float = np.rad2deg(el % np.pi)
-    return phi, theta, r
+    theta: float = np.rad2deg(el)
+    phi: float = np.rad2deg(az)
+    return r, theta, phi
diff --git a/src/tests/utils/test_wrappers.py b/src/tests/utils/test_wrappers.py
index 99062d3..8cf83a1 100644
--- a/src/tests/utils/test_wrappers.py
+++ b/src/tests/utils/test_wrappers.py
@@ -5,17 +5,17 @@
 class TestFeatures(unittest.TestCase):
     def test_cart2sph(self):
         x, y, z = 1, 1, 1
-        az, el, r = cart2sph(x, y, z)
-        self.assertAlmostEqual(az, 0.7853981633974483)
-        self.assertAlmostEqual(el, 0.6154797086703873)
+        r, el, az = cart2sph(x, y, z)
         self.assertAlmostEqual(r, 1.7320508075688772)
+        self.assertAlmostEqual(el, 0.9553166181245092)
+        self.assertAlmostEqual(az, 0.7853981633974483)
 
     def test_sph2deg(self):
-        az, el, r = 0.7853981633974483, 0.6154797086703873, 1.7320508075688772
-        phi, theta, r = sph2deg(az, el, r)
-        self.assertAlmostEqual(phi, 45.0)
-        self.assertAlmostEqual(theta, 35.26438968275466)
+        r, el, az = 1.7320508075688772, 0.9553166181245092, 0.7853981633974483
+        r, theta, phi = sph2deg(r, el, az)
         self.assertAlmostEqual(r, 1.7320508075688772)
+        self.assertAlmostEqual(theta, 54.735610317245346)
+        self.assertAlmostEqual(phi, 45.00)
 
 
 if __name__ == "__main__":

From 1baf5eba91b4433f978407aa7c60bb45963fe5f1 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 23 Apr 2024 23:06:30 +0200
Subject: [PATCH 35/44] Add plot script

---
 src/plot.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 src/plot.py

diff --git a/src/plot.py b/src/plot.py
new file mode 100644
index 0000000..8ba20ed
--- /dev/null
+++ b/src/plot.py
@@ -0,0 +1,106 @@
+import numpy as np
+from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+import matplotlib.pyplot as plt
+import argparse
+
+
+def smooth(scalars: list[float] | np.ndarray, weight: float) -> list[float]:
+    """
+    EMA implementation according to
+    https://github.com/tensorflow/tensorboard/blob/34877f15153e1a2087316b9952c931807a122aa7/tensorboard/components/vz_line_chart2/line-chart.ts#L699
+    """
+    last = 0
+    smoothed = []
+    num_acc = 0
+    for next_val in scalars:
+        last = last * weight + (1 - weight) * next_val
+        num_acc += 1
+        # de-bias
+        debias_weight = 1
+        if weight != 1:
+            debias_weight = 1 - (weight**num_acc)
+        smoothed_val = last / debias_weight
+        smoothed.append(smoothed_val)
+
+    return smoothed
+
+
+def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5):
+    """
+    Plot TensorBoard logs for specified tags with optional smoothing.
+
+    Args:
+    - log_dir (str): Path to the directory containing TensorBoard logs.
+    - tags_to_plot (list): List of tags to plot.
+    - smooth_weight (int, optional): Window size for moving average smoothing. Default is 0.5.
+
+    Returns:
+    - None
+    """
+
+    # Determine colors for lines
+    colors = plt.cm.Set2(np.linspace(0, 1, len(tags_to_plot)))  # type: ignore
+    line_width = 0.6
+
+    for log_dir in log_dirs:
+        # Load TensorBoard logs
+        event_acc = EventAccumulator(log_dir)
+        event_acc.Reload()
+
+        # Get all scalar events
+        scalar_tags = event_acc.Tags()["scalars"]
+
+        # Load TensorBoard logs
+        event_acc = EventAccumulator(log_dir)
+        event_acc.Reload()
+
+        # Plot specified tags
+        for i, tag in enumerate(tags_to_plot):
+            if tag in scalar_tags:
+                events = event_acc.Scalars(tag)
+                steps = np.array([event.step for event in events])
+                values = np.array([event.value for event in events])
+
+                if smooth_weight > 0:
+                    # Apply moving average smoothing
+                    smoothed_values = smooth(values, smooth_weight)
+
+                    # Plot smoothed data with custom color
+                    plt.plot(
+                        steps,
+                        smoothed_values,
+                        label=tag + f" ({smooth_weight} smoothing)",
+                        color=colors[i],
+                        linewidth=line_width,
+                    )
+
+                    # Plot original data with lower opacity using the same color
+                    plt.plot(steps, values, alpha=0.3, color=colors[i], label=tag, linewidth=line_width)
+                else:
+                    plt.plot(steps, values, color=colors[i], label=tag, linewidth=line_width)
+            else:
+                print(f"Tag '{tag}' not found in TensorBoard logs.")
+
+    plt.grid(True, alpha=0.1)
+    plt.xlabel("Step")
+    plt.ylabel("Value")
+    plt.legend()
+    plot_name = "plot-" + "-".join(tags_to_plot).replace("/", "_") + ".pdf"
+    plt.savefig(plot_name)
+    print(f"Plot saved as '{plot_name}'")
+    plt.show()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Plot TensorBoard logs.")
+    parser.add_argument("log_dirs", nargs="+", help="Path(s) to the directory containing TensorBoard logs")
+    parser.add_argument("-t", "--tags", nargs="+", help="Scalar tags to plot", required=True)
+    parser.add_argument("-s", "--smoothing", type=float, default=0, help="Window size for moving average smoothing")
+
+    args = parser.parse_args()
+
+    plot_tensorboard_logs(args.log_dirs, args.tags, smooth_weight=args.smoothing)
+
+
+if __name__ == "__main__":
+    main()

From da3649157427d1ef7450a50cc2495d73438fe776 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Wed, 24 Apr 2024 12:19:13 +0200
Subject: [PATCH 36/44] Remove velocity from reward

---
 src/fastfiz_env/reward_functions/default_reward.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/fastfiz_env/reward_functions/default_reward.py b/src/fastfiz_env/reward_functions/default_reward.py
index fc0ce87..b439514 100644
--- a/src/fastfiz_env/reward_functions/default_reward.py
+++ b/src/fastfiz_env/reward_functions/default_reward.py
@@ -8,7 +8,7 @@
     NegativeConstantWeightMaxSteps,
     ConstantWeight,
     NegativeConstantWeight,
-    ExponentialVelocityReward,
+    # ExponentialVelocityReward,
     BallsNotMovedReward,
 )
 
@@ -19,7 +19,6 @@
     ConstantReward(NegativeConstantWeightMaxSteps),
     BallsNotMovedReward(NegativeConstantWeightMaxSteps),
     StepPocketedReward(ConstantWeightBalls),
-    ExponentialVelocityReward(NegativeConstantWeight),
 ]
 
 DefaultReward = CombinedReward(reward_functions=rewards, short_circuit=True)
@@ -32,7 +31,6 @@
 - ConstantReward: -1 / max_episode_steps
 - BallsNotMovedReward: -1 / max_episode_steps
 - StepPocketedReward: 1 / (num_balls - 1)
-- ExponentialVelocityReward: -1
 
 Returns:
     CombinedReward: The default reward function.

From 26b4cf21b602f44299b7bcf0ec2c3f842571ed80 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Wed, 24 Apr 2024 12:19:28 +0200
Subject: [PATCH 37/44] Fix calc and conversion

---
 src/fastfiz_env/wrappers/action.py |  6 +++---
 src/fastfiz_env/wrappers/utils.py  | 11 ++++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py
index 149ff1e..6cfb4cc 100644
--- a/src/fastfiz_env/wrappers/action.py
+++ b/src/fastfiz_env/wrappers/action.py
@@ -134,9 +134,9 @@ def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[
             case ActionSpaces.VECTOR_3D:
                 x, y, z = action
                 r, el, az = cart2sph(x, y, z)
-                r, theta, phi = sph2deg(r, el, az)
-                phi = float(np.interp(az, (0, np.rad2deg(np.pi)), (self.MIN_PHI, self.MAX_PHI)))
-                theta = float(np.interp(el, (-np.rad2deg(np.pi), np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA)))
+                r, el, az = sph2deg(r, el, az)
+                phi = float(np.interp(az, (0, 360), (self.MIN_PHI, self.MAX_PHI)))
+                theta = float(np.interp(el, (0, 180), (self.MIN_THETA, self.MAX_THETA)))
                 velocity = float(np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)))
             case ActionSpaces.NORM_3D:
                 theta = float(np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)))
diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py
index 0bfe321..0080d79 100644
--- a/src/fastfiz_env/wrappers/utils.py
+++ b/src/fastfiz_env/wrappers/utils.py
@@ -14,8 +14,9 @@ def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:
         tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in radians), and azimuth angle (phi, in radians).
     """
     r = np.sqrt(x**2 + y**2 + z**2)
-    el: float = np.arccos(z / r)
-    az: float = np.arctan2(y, x)
+    el = np.arccos(z / r)
+    az = (np.arctan2(y, x) + 2 * np.pi) % (2 * np.pi)  # Using arctan2 to get correct quadrant
+
     return r, el, az
 
 
@@ -31,6 +32,6 @@ def sph2deg(r: float, el: float, az: float) -> tuple[float, float, float]:
     Returns:
         tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in degrees), and azimuth angle (phi, in degrees).
     """
-    theta: float = np.rad2deg(el)
-    phi: float = np.rad2deg(az)
-    return r, theta, phi
+    el: float = np.rad2deg(el)
+    az: float = np.rad2deg(az)
+    return r, el, az

From ee54e636b563064ae41970c13376925248855fd4 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 25 Apr 2024 11:49:34 +0200
Subject: [PATCH 38/44] Add action_space_id option

---
 src/fastfiz_env/make.py | 8 +++++---
 src/train.py            | 7 ++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py
index 7fa8b2a..2f2f30d 100644
--- a/src/fastfiz_env/make.py
+++ b/src/fastfiz_env/make.py
@@ -43,6 +43,7 @@ def make_wrapped_env(
     num_balls: int,
     max_episode_steps: int,
     reward_function: RewardFunction,
+    action_space_id: ActionSpaces,
     **kwargs,
 ):
     """
@@ -56,7 +57,7 @@ def make_wrapped_env(
         disable_env_checker=False,
         **kwargs,
     )
-    env = FastFizActionWrapper(env, action_space_id=ActionSpaces.VECTOR_3D)
+    env = FastFizActionWrapper(env, action_space_id=action_space_id)
     return env
 
 
@@ -64,7 +65,8 @@ def make_callable_wrapped_env(
     env_id: str,
     num_balls: int,
     max_episode_steps: int,
-    reward_function: RewardFunction,
+    reward_function: RewardFunction = DefaultReward,
+    action_space_id: ActionSpaces = ActionSpaces.VECTOR_3D,
     **kwargs,
 ):
     """
@@ -73,6 +75,6 @@ def make_callable_wrapped_env(
     """
 
     def _init() -> gym.Env:
-        return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, **kwargs)
+        return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, action_space_id, **kwargs)
 
     return _init
diff --git a/src/train.py b/src/train.py
index 7157dcd..121d5b5 100644
--- a/src/train.py
+++ b/src/train.py
@@ -13,6 +13,7 @@
     CallbackList,
 )
 from stable_baselines3.common.env_util import make_vec_env
+from fastfiz_env.wrappers.action import ActionSpaces
 from hyperparams import params_to_kwargs
 
 
@@ -40,11 +41,12 @@ def train(
     logs_path: str = "logs/",
     models_path: str = "models/",
     reward_function: RewardFunction = DefaultReward,
+    action_space_id: ActionSpaces = ActionSpaces.VECTOR_3D,
     callbacks=None,
     params: Optional[dict] = None,
 ) -> None:
     env = make_vec_env(
-        make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function),
+        make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, action_space_id=action_space_id),
         n_envs=n_envs,
     )
 
@@ -122,6 +124,8 @@ def train(
         help="Path to hyperparameters file (file must have key 'params' with dict of hyperparameters",
     )
 
+    parser.add_argument("-a", "--action_id", type=ActionSpaces, choices=list(ActionSpaces), default=ActionSpaces.VECTOR_3D)
+
     args = parser.parse_args()
 
     reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward
@@ -164,5 +168,6 @@ def train(
         logs_path=logs_path,
         models_path=models_path,
         reward_function=reward_function,
+        action_space_id=args.action_id,
         params=params,
     )

From c17989b05bd3fa996e51bf9b66ac8bc5856f2ad5 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Thu, 25 Apr 2024 11:55:40 +0200
Subject: [PATCH 39/44] Fix model name

---
 src/train.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/train.py b/src/train.py
index 121d5b5..aadc06f 100644
--- a/src/train.py
+++ b/src/train.py
@@ -27,8 +27,8 @@ def get_latest_run_id(log_path: str, name: str) -> int:
     return id
 
 
-def get_model_name(env_name: str, balls: int, algo: str = "PPO") -> str:
-    return f"{env_name.split('FastFiz-v0')[0]}-{balls}_balls-{algo}".lower()
+def get_model_name(env_name: str, balls: int, algo: str = "PPO", action_space_id=ActionSpaces.VECTOR_3D) -> str:
+    return f"{env_name.split('FastFiz-v0')[0]}-{balls}_balls-{action_space_id.name}-{algo}".lower()
 
 
 def train(
@@ -52,7 +52,7 @@ def train(
 
     hyperparams = params_to_kwargs(**params) if params else {}
     print(hyperparams)
-    model_name = get_model_name(env_id, num_balls)
+    model_name = get_model_name(env_id, num_balls, action_space_id)
 
     if model_dir is None:
         model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams)
@@ -156,7 +156,8 @@ def train(
           model_path: {model_path}\n\
           logs_path: {logs_path}\n\
           models_path: {models_path}\n\
-          reward_function: {reward}\n"
+          reward_function: {reward}\n\
+          action_space_id: {args.action_id}\n"
     )
 
     train(

From 20b71da1c4eb08d749de3774e778ae5e390f892a Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 30 Apr 2024 14:23:16 +0200
Subject: [PATCH 40/44] Add script to log random policy evaluation metrics

---
 src/random_policy.py | 63 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 src/random_policy.py

diff --git a/src/random_policy.py b/src/random_policy.py
new file mode 100644
index 0000000..6c95116
--- /dev/null
+++ b/src/random_policy.py
@@ -0,0 +1,63 @@
+from torch.utils.tensorboard import SummaryWriter
+
+from stable_baselines3.common.env_util import make_vec_env
+from fastfiz_env.reward_functions.default_reward import DefaultReward
+from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper
+import fastfiz_env
+
+
+# Create and wrap the environment
+# env = DummyVecEnv([lambda: gym.make("CartPole-v1")])
+env = fastfiz_env.make_callable_wrapped_env(
+    "PocketsFastFiz-v0",
+    max_episode_steps=20,
+    reward_function=DefaultReward,
+    action_space_id=ActionSpaces.VECTOR_3D,
+    num_balls=2,
+)
+
+env = make_vec_env(env, n_envs=1)
+
+total_timesteps = 8_000_000
+eval_freq = 50_000
+eval_episodes = 100
+total_runs = (total_timesteps // eval_freq) * eval_episodes
+# total_runs = 10000
+
+# Initialize the SummaryWriter
+writer = SummaryWriter(log_dir="logs/random_policy", comment="-random-policy")
+
+total_success = 0
+total_len = 0
+total_reward = 0
+for episode in range(total_runs):
+    obs = env.reset()
+
+    # Your policy rollout code here
+    done = False
+    while not done:
+        action = env.action_space.sample()  # Random policy
+        obs, reward, done, info = env.step([action])
+        total_len += 1
+        total_reward += reward
+
+    total_success += int(info[0]["is_success"])
+
+    # Log episode reward
+
+
+success_mean = total_success / total_runs
+episode_mean = total_len / total_runs
+rew_mean = total_reward / total_runs
+print(f"Success rate: {success_mean}")
+print(f"Mean episode length: {episode_mean}")
+print(f"Mean episode reward: {rew_mean}")
+for episode in range(1, total_timesteps + eval_freq - 1):
+    if episode % eval_freq == 0:
+        writer.add_scalar("eval/success_rate", success_mean, episode)
+        writer.add_scalar("eval/mean_reward", rew_mean, episode)
+        writer.add_scalar("eval/mean_ep_length", episode_mean, episode)
+
+# Close the environment and SummaryWriter
+env.close()
+writer.close()

From 8dd0213882ab069883aeaa69112a65b3651e0890 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 30 Apr 2024 14:25:07 +0200
Subject: [PATCH 41/44] Reset with seed

---
 src/fastfiz_env/envs/pockets_fastfiz.py | 2 +-
 src/fastfiz_env/envs/simple_fastfiz.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fastfiz_env/envs/pockets_fastfiz.py b/src/fastfiz_env/envs/pockets_fastfiz.py
index ae7c9bf..3cc789a 100644
--- a/src/fastfiz_env/envs/pockets_fastfiz.py
+++ b/src/fastfiz_env/envs/pockets_fastfiz.py
@@ -60,7 +60,7 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -
         if self.max_episode_steps is None or self.elapsed_steps is None:
             self._get_time_limit_attrs()
 
-        self.table_state = create_random_table_state(self.num_balls)
+        self.table_state = create_random_table_state(self.num_balls, seed=seed)
         self.reward.reset(self.table_state)
         self._prev_pocketed = 0
 
diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py
index 3de9ddf..bc628c4 100644
--- a/src/fastfiz_env/envs/simple_fastfiz.py
+++ b/src/fastfiz_env/envs/simple_fastfiz.py
@@ -58,7 +58,7 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -
         if self.max_episode_steps is None or self.elapsed_steps is None:
             self._get_time_limit_attrs()
 
-        self.table_state = create_random_table_state(self.num_balls)
+        self.table_state = create_random_table_state(self.num_balls, seed=seed)
         self.reward.reset(self.table_state)
         self._prev_pocketed = 0
 

From f9fd52fc705ce8c3439d2c1b93e3f45078f3b4a6 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 30 Apr 2024 14:25:40 +0200
Subject: [PATCH 42/44] Remove commit versions

---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 63e4dba..1413b98 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 dynamic = ["version"]
 dependencies = [
-    "fastfiz @ git+https://github.com/P6-Pool/fastfiz.git@2af8aed22bec1faeb5ac92b98b0751a0023f3fb7",
+    "fastfiz @ git+https://github.com/P6-Pool/fastfiz.git",
     "gymnasium",
     "numpy",
     "vectormath",
@@ -18,7 +18,7 @@ dependencies = [
 
 [project.optional-dependencies]
 dev = [
-    "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062",
+    "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git",
     "stable-baselines3",
     "tqdm",
     "rich",
@@ -29,7 +29,7 @@ dev = [
 test = ["pytest", "mypy", "ruff"]
 all = [
     # dev
-    "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062",
+    "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git",
     "stable-baselines3",
     "tqdm",
     "rich",

From d29ed7833140842002dddbd2368645f2c1554468 Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 30 Apr 2024 14:28:28 +0200
Subject: [PATCH 43/44] Setup Latex plots

---
 src/plot.py | 84 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 15 deletions(-)

diff --git a/src/plot.py b/src/plot.py
index 8ba20ed..b9bb25b 100644
--- a/src/plot.py
+++ b/src/plot.py
@@ -3,6 +3,15 @@
 import matplotlib.pyplot as plt
 import argparse
 
+# Setup for Latex rendering
+plt.rcParams.update(
+    {
+        "text.usetex": True,
+        "font.family": "serif",  # Use serif font
+        "font.serif": ["Computer Modern Roman"],  # Use Computer Modern Roman font
+    }
+)
+
 
 def smooth(scalars: list[float] | np.ndarray, weight: float) -> list[float]:
     """
@@ -25,24 +34,28 @@ def smooth(scalars: list[float] | np.ndarray, weight: float) -> list[float]:
     return smoothed
 
 
-def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5):
+def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5, show=True) -> None:
     """
     Plot TensorBoard logs for specified tags with optional smoothing.
 
     Args:
-    - log_dir (str): Path to the directory containing TensorBoard logs.
+    - log_dirs (str): Path to the directory containing TensorBoard logs.
     - tags_to_plot (list): List of tags to plot.
     - smooth_weight (int, optional): Window size for moving average smoothing. Default is 0.5.
+    - show (bool, optional): Whether to display the plot. Default is True.
 
     Returns:
     - None
     """
 
     # Determine colors for lines
-    colors = plt.cm.Set2(np.linspace(0, 1, len(tags_to_plot)))  # type: ignore
-    line_width = 0.6
+    # colors = plt.cm.tab10(np.linspace(0, 1, len(tags_to_plot)))  # type: ignore
+    cmap = plt.get_cmap("tab10")
+    colors = [cmap(int(i * 3.25 % 10)) for i in np.linspace(0, 1, len(tags_to_plot) * len(log_dirs))]
+
+    line_width = 0.5
 
-    for log_dir in log_dirs:
+    for j, log_dir in enumerate(log_dirs):
         # Load TensorBoard logs
         event_acc = EventAccumulator(log_dir)
         event_acc.Reload()
@@ -50,17 +63,33 @@ def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5):
         # Get all scalar events
         scalar_tags = event_acc.Tags()["scalars"]
 
+        graph_name = log_dir.split("/")[-1]
+        plot_name = ", ".join(tags_to_plot)
+
         # Load TensorBoard logs
         event_acc = EventAccumulator(log_dir)
         event_acc.Reload()
 
         # Plot specified tags
         for i, tag in enumerate(tags_to_plot):
+            color = colors[j * len(tags_to_plot) + i]
             if tag in scalar_tags:
                 events = event_acc.Scalars(tag)
                 steps = np.array([event.step for event in events])
                 values = np.array([event.value for event in events])
 
+                # Custom action space labels
+                def action_space(n: int):
+                    return r"$\mathcal{A}_" + f"{n}" + r"$"
+
+                label = graph_name.replace("_", ", ").replace("-", " ")
+                if "cart" in graph_name.lower():
+                    label = label.split(", ")[0] + f", {action_space(2)}"
+                elif "reg" in graph_name.lower():
+                    label = label.split(", ")[0] + f", {action_space(1)}"
+                elif "random" in graph_name.lower():
+                    label = label.split(", ")[0] + ", random policy"
+
                 if smooth_weight > 0:
                     # Apply moving average smoothing
                     smoothed_values = smooth(values, smooth_weight)
@@ -69,37 +98,62 @@ def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5):
                     plt.plot(
                         steps,
                         smoothed_values,
-                        label=tag + f" ({smooth_weight} smoothing)",
-                        color=colors[i],
+                        label=label + f" ({smooth_weight} EMA)",
+                        color=color,
                         linewidth=line_width,
                     )
 
                     # Plot original data with lower opacity using the same color
-                    plt.plot(steps, values, alpha=0.3, color=colors[i], label=tag, linewidth=line_width)
+                    plt.plot(steps, values, alpha=0.25, color=color, label=None, linewidth=line_width)
                 else:
-                    plt.plot(steps, values, color=colors[i], label=tag, linewidth=line_width)
+                    plt.plot(steps, values, color=color, label=label, linewidth=line_width)
             else:
                 print(f"Tag '{tag}' not found in TensorBoard logs.")
 
     plt.grid(True, alpha=0.1)
     plt.xlabel("Step")
-    plt.ylabel("Value")
-    plt.legend()
+    plt.ylabel(
+        tags_to_plot[0]
+        .split("/")[-1]
+        .replace("_", " ")
+        .replace("rew", "reward")
+        .replace("ep", "episode")
+        .replace("len", "length")
+        .title()
+    )
+    plt.legend(fontsize="small")
+
     plot_name = "plot-" + "-".join(tags_to_plot).replace("/", "_") + ".pdf"
     plt.savefig(plot_name)
     print(f"Plot saved as '{plot_name}'")
-    plt.show()
+    if show:
+        plt.show()
+    plt.clf()
 
 
 def main():
     parser = argparse.ArgumentParser(description="Plot TensorBoard logs.")
     parser.add_argument("log_dirs", nargs="+", help="Path(s) to the directory containing TensorBoard logs")
-    parser.add_argument("-t", "--tags", nargs="+", help="Scalar tags to plot", required=True)
+    parser.add_argument("-t", "--tags", nargs="+", help="Scalar tags to plot")
     parser.add_argument("-s", "--smoothing", type=float, default=0, help="Window size for moving average smoothing")
-
+    parser.add_argument("-a", "--all", action="store_true", help="Plot all tags in the log directory")
     args = parser.parse_args()
 
-    plot_tensorboard_logs(args.log_dirs, args.tags, smooth_weight=args.smoothing)
+    if args.all:
+        tags_to_plot = [
+            "eval/success_rate",
+            "eval/mean_reward",
+            "eval/mean_ep_length",
+            "rollout/ep_rew_mean",
+            "rollout/ep_len_mean",
+        ]
+        for tag in tags_to_plot:
+            smoothing = 0
+            if tag.startswith("rollout"):
+                smoothing = args.smoothing or 0.5
+            plot_tensorboard_logs(args.log_dirs, [tag], smooth_weight=smoothing, show=False)
+    else:
+        plot_tensorboard_logs(args.log_dirs, args.tags, smooth_weight=args.smoothing)
 
 
 if __name__ == "__main__":

From c012f8b975acb5bfacc0483859b70e1acde34a8a Mon Sep 17 00:00:00 2001
From: Mads Risager <madssr2@gmail.com>
Date: Tue, 30 Apr 2024 14:43:24 +0200
Subject: [PATCH 44/44] Fix already defined

---
 src/fastfiz_env/wrappers/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py
index 0080d79..f518cf4 100644
--- a/src/fastfiz_env/wrappers/utils.py
+++ b/src/fastfiz_env/wrappers/utils.py
@@ -32,6 +32,6 @@ def sph2deg(r: float, el: float, az: float) -> tuple[float, float, float]:
     Returns:
         tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in degrees), and azimuth angle (phi, in degrees).
     """
-    el: float = np.rad2deg(el)
-    az: float = np.rad2deg(az)
-    return r, el, az
+    el_deg: float = np.rad2deg(el)
+    az_deg: float = np.rad2deg(az)
+    return r, el_deg, az_deg