From b12ea887dd9240082910153023bfe7825f1e3783 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 13:35:42 +0200 Subject: [PATCH 01/44] Save best trial kwargs for PPO model --- src/optimize.py | 122 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 30 deletions(-) diff --git a/src/optimize.py b/src/optimize.py index 20a5f66..4814509 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -7,13 +7,14 @@ import time import torch import torch.nn as nn -from fastfiz_env.make import make_wrapped_env, make_wrapped_vec_env +from fastfiz_env.make import make_callable_wrapped_env from fastfiz_env.reward_functions import DefaultReward, WinningReward, RewardFunction from optuna.pruners import MedianPruner from optuna.samplers import TPESampler from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.env_util import make_vec_env from typing import Any, Dict @@ -43,16 +44,20 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5] ) vf_coef = trial.suggest_float("vf_coef", 0, 1) - net_arch_type = trial.suggest_categorical("net_arch", ["tiny", "small", "medium"]) + net_arch_type = trial.suggest_categorical( + "net_arch_type", ["tiny", "small", "medium"] + ) # Uncomment for gSDE (continuous actions) # log_std_init = trial.suggest_float("log_std_init", -4, 1) # Uncomment for gSDE (continuous action) # sde_sample_freq = trial.suggest_categorical("sde_sample_freq", [-1, 8, 16, 32, 64, 128, 256]) # Orthogonal initialization - ortho_init = False + ortho_init = trial.suggest_categorical("ortho_init", [False]) # ortho_init = trial.suggest_categorical('ortho_init', [False, True]) # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu']) - activation_fn_name = trial.suggest_categorical("activation_fn", ["tanh", "relu"]) + activation_fn_name = trial.suggest_categorical( + "activation_fn_name", ["tanh", "relu"] + ) # lr_schedule = "constant" # Uncomment to enable learning rate schedule # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant']) @@ -65,6 +70,40 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: # Independent networks usually work best # when not working with images + return params_to_kwargs( + batch_size=batch_size, + n_steps=n_steps, + gamma=gamma, + learning_rate=learning_rate, + ent_coef=ent_coef, + clip_range=clip_range, + n_epochs=n_epochs, + gae_lambda=gae_lambda, + max_grad_norm=max_grad_norm, + vf_coef=vf_coef, + net_arch_type=net_arch_type, + ortho_init=ortho_init, + activation_fn_name=activation_fn_name, + ) + + +def params_to_kwargs( + *, + batch_size, + n_steps, + gamma, + learning_rate, + ent_coef, + clip_range, + n_epochs, + gae_lambda, + max_grad_norm, + vf_coef, + net_arch_type, + ortho_init, + activation_fn_name, + **kwargs, +): net_arch = { "tiny": dict(pi=[64], vf=[64]), "small": dict(pi=[64, 64], vf=[64, 64]), @@ -90,12 +129,12 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: "max_grad_norm": max_grad_norm, "vf_coef": vf_coef, # "sde_sample_freq": sde_sample_freq, - "policy_kwargs": dict( + "policy_kwargs": { # log_std_init=log_std_init, - net_arch=net_arch, - activation_fn=activation_fn, - ortho_init=ortho_init, - ), + "net_arch": net_arch, + "activation_fn": activation_fn, + "ortho_init": ortho_init, + }, } @@ -145,19 +184,23 @@ def objective( eval_freq: int, n_timesteps: int, start_time: str, + no_logs: bool, ) -> float: kwargs = sample_ppo_params(trial) N_ENVS = 4 - env = make_wrapped_vec_env( - env_id, num_balls, max_episode_steps, N_ENVS, reward_function + env = make_vec_env( + make_callable_wrapped_env( + env_id, num_balls, max_episode_steps, reward_function + ), + n_envs=N_ENVS, ) model = PPO( "MlpPolicy", env, **kwargs, - tensorboard_log=f"logs/trials", + tensorboard_log="logs/trials" if not no_logs else None, ) # Create the callback that will periodically evaluate and report the performance. @@ -198,13 +241,21 @@ def objective( def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + trial_dict = { + "value": trial.value, + "params": trial.params, + "user_attrs": trial.user_attrs, + "kwargs": params_to_kwargs(**trial.params), + } + + trial_dict["kwargs"]["policy_kwargs"]["activation_fn"] = trial_dict["params"][ + "activation_fn_name" + ] + with open(path, "w") as fp: json.dump( - { - "value": trial.value, - "params": trial.params, - "user_attrs": trial.user_attrs, - }, + trial_dict, fp, indent=4, ) @@ -242,8 +293,17 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: help="Environment ID", required=True, ) - parser.add_argument("--n_jobs", type=int, default=1) - parser.add_argument("--max_episode_steps", type=int, default=20) + parser.add_argument("--n_jobs", type=int, default=1, help="Number of parallel jobs") + parser.add_argument( + "--max_episode_steps", + type=int, + default=20, + help="Max episode steps for the environment", + ) + parser.add_argument( + "--no-logs", action="store_true", help="Disable Tensorboard logging" + ) + args = parser.parse_args() # Set pytorch num threads to 1 for faster training. @@ -260,17 +320,19 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward - obj_fn = lambda trial: objective( - trial, - args.env_id, - args.num_balls, - args.max_episode_steps, - reward_function, - args.n_eval_episodes, - args.eval_freq, - args.n_timesteps, - start_time, - ) + def obj_fn(trial): + return objective( + trial, + args.env_id, + args.num_balls, + args.max_episode_steps, + reward_function, + args.n_eval_episodes, + args.eval_freq, + args.n_timesteps, + start_time, + args.no_logs, + ) try: study.optimize(obj_fn, n_trials=args.n_trials, timeout=3600, n_jobs=args.n_jobs) From d8b72d710253f44e06569213c7a4e3b88c75372b Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 13:37:40 +0200 Subject: [PATCH 02/44] Remove sb3 dependency from fastfiz-env package --- src/fastfiz_env/make.py | 21 +++++++++++---------- src/train.py | 10 +++++++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py index 299013f..9b13b4e 100644 --- a/src/fastfiz_env/make.py +++ b/src/fastfiz_env/make.py @@ -3,7 +3,6 @@ from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper from .reward_functions import RewardFunction, DefaultReward -from stable_baselines3.common.env_util import make_vec_env def make( @@ -13,7 +12,7 @@ def make( num_balls: int = 16, max_episode_steps: int = 100, disable_env_checker: bool = True, - **kwargs + **kwargs, ) -> gym.Env: """ Create an instance of the specified environment. @@ -42,6 +41,9 @@ def make( def make_wrapped_env( env_id: str, num_balls: int, max_episode_steps: int, reward_function: RewardFunction ): + """ + Create an instance of the specified environment with the FastFizActionWrapper. + """ env = make( env_id, reward_function=reward_function, @@ -53,19 +55,18 @@ def make_wrapped_env( return env -def make_wrapped_vec_env( +def make_callable_wrapped_env( env_id: str, num_balls: int, max_episode_steps: int, - n_envs: int, reward_function: RewardFunction, ): + """ + Create a callable function that returns an instance of the specified environment with the FastFizActionWrapper. + This is useful for creating environments in parallel or with stable-baselines `make_vec_env` function. + """ - def make_env(): + def _init() -> gym.Env: return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function) - env = make_vec_env( - make_env, - n_envs=n_envs, - ) - return env + return _init diff --git a/src/train.py b/src/train.py index 09de250..12ace36 100644 --- a/src/train.py +++ b/src/train.py @@ -1,7 +1,7 @@ import argparse import glob import os -from fastfiz_env.make import make_wrapped_vec_env +from fastfiz_env.make import make_callable_wrapped_env from fastfiz_env.reward_functions import RewardFunction from typing import Optional from stable_baselines3 import PPO @@ -11,6 +11,7 @@ EvalCallback, CallbackList, ) +from stable_baselines3.common.env_util import make_vec_env def get_latest_run_id(log_path: str, name: str) -> int: @@ -39,8 +40,11 @@ def train( reward_function: RewardFunction = DefaultReward, callbacks=None, ) -> None: - env = make_wrapped_vec_env( - env_id, num_balls, max_episode_steps, n_envs, reward_function + env = make_vec_env( + make_callable_wrapped_env( + env_id, num_balls, max_episode_steps, reward_function + ), + n_envs=n_envs, ) model_name = get_model_name(env_id, num_balls) From 2f1600a3a9225dbe489d210a1b36160a68449f01 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 14:04:24 +0200 Subject: [PATCH 03/44] Add params arg to set hyperparams --- src/train.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/train.py b/src/train.py index 12ace36..4254eb6 100644 --- a/src/train.py +++ b/src/train.py @@ -1,5 +1,6 @@ import argparse import glob +import json import os from fastfiz_env.make import make_callable_wrapped_env from fastfiz_env.reward_functions import RewardFunction @@ -12,6 +13,7 @@ CallbackList, ) from stable_baselines3.common.env_util import make_vec_env +from hyperparams import params_to_kwargs def get_latest_run_id(log_path: str, name: str) -> int: @@ -39,6 +41,7 @@ def train( models_path: str = "models/", reward_function: RewardFunction = DefaultReward, callbacks=None, + params: Optional[dict] = None, ) -> None: env = make_vec_env( make_callable_wrapped_env( @@ -47,10 +50,14 @@ def train( n_envs=n_envs, ) + hyperparams = params_to_kwargs(**params) if params else {} + print(hyperparams) model_name = get_model_name(env_id, num_balls) if model_dir is None: - model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logs_path) + model = PPO( + "MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams + ) else: model = PPO.load(model_dir, env=env, verbose=1, tensorboard_log=logs_path) pretrained_name = model_dir.split("/")[-1].rsplit(".zip", 1)[0] @@ -111,6 +118,14 @@ def train( choices=["DefaultReward", "WinningReward"], default="DefaultReward", ) + + # Hyper params + parser.add_argument( + "--params", + type=str, + help="Path to hyperparameters file (file must have key 'params' with dict of hyperparameters", + ) + args = parser.parse_args() reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward @@ -123,6 +138,17 @@ def train( total_timesteps = args.n_time_steps logs_path = args.logs_path models_path = args.models_path + reward = args.reward + params = None + if args.params: + params_path = args.params + assert os.path.exists(logs_path), f"params path does not exist: {logs_path}" + with open(params_path, "r") as fp: + params = json.load(fp) + assert ( + "params" in params + ), "params file must have key 'params' with dict of hyperparameters" + params = params["params"] print( f"Starting training on {env_id} with following settings:\n\ @@ -132,7 +158,7 @@ def train( model_path: {model_path}\n\ logs_path: {logs_path}\n\ models_path: {models_path}\n\ - reward_function: {args.reward}\n" + reward_function: {reward}\n" ) train( @@ -144,4 +170,5 @@ def train( logs_path=logs_path, models_path=models_path, reward_function=reward_function, + params=params, ) From e38ada3b981cf445bacdce50931488442d37deb6 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 14:05:05 +0200 Subject: [PATCH 04/44] Add make_callable_wrapped_env to __all__ --- src/fastfiz_env/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fastfiz_env/__init__.py b/src/fastfiz_env/__init__.py index 23d92a8..2be325b 100644 --- a/src/fastfiz_env/__init__.py +++ b/src/fastfiz_env/__init__.py @@ -28,14 +28,16 @@ """ -from .make import make, make_wrapped_vec_env, make_wrapped_env +__version__ = "0.0.1" + +from .make import make, make_wrapped_env, make_callable_wrapped_env from .reward_functions import DefaultReward, RewardFunction, CombinedReward from . import envs, utils, wrappers, reward_functions __all__ = [ "make", - "make_wrapped_vec_env", "make_wrapped_env", + "make_callable_wrapped_env", "DefaultReward", "RewardFunction", "CombinedReward", From 570867729cea09ce36374e8b59d84c9e6f124c82 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 14:05:26 +0200 Subject: [PATCH 05/44] Add params_to_kwargs() function --- src/hyperparams.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/hyperparams.py diff --git a/src/hyperparams.py b/src/hyperparams.py new file mode 100644 index 0000000..6d87690 --- /dev/null +++ b/src/hyperparams.py @@ -0,0 +1,52 @@ +import torch.nn as nn + + +def params_to_kwargs( + *, + batch_size, + n_steps, + gamma, + learning_rate, + ent_coef, + clip_range, + n_epochs, + gae_lambda, + max_grad_norm, + vf_coef, + net_arch_type, + ortho_init, + activation_fn_name, + **kwargs, +): + net_arch = { + "tiny": dict(pi=[64], vf=[64]), + "small": dict(pi=[64, 64], vf=[64, 64]), + "medium": dict(pi=[256, 256], vf=[256, 256]), + }[net_arch_type] + + activation_fn = { + "tanh": nn.Tanh, + "relu": nn.ReLU, + "elu": nn.ELU, + "leaky_relu": nn.LeakyReLU, + }[activation_fn_name] + + return { + "n_steps": n_steps, + "batch_size": batch_size, + "gamma": gamma, + "learning_rate": learning_rate, + "ent_coef": ent_coef, + "clip_range": clip_range, + "n_epochs": n_epochs, + "gae_lambda": gae_lambda, + "max_grad_norm": max_grad_norm, + "vf_coef": vf_coef, + # "sde_sample_freq": sde_sample_freq, + "policy_kwargs": { + # log_std_init=log_std_init, + "net_arch": net_arch, + "activation_fn": activation_fn, + "ortho_init": ortho_init, + }, + } From dd680d2211fe73a933d8e18d72e12c357ab5b5ec Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 14:06:51 +0200 Subject: [PATCH 06/44] Add params_to_kwargs() function --- src/optimize.py | 57 +------------------------------------------------ 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/src/optimize.py b/src/optimize.py index 4814509..222728b 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -16,6 +16,7 @@ from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.env_util import make_vec_env from typing import Any, Dict +from hyperparams import params_to_kwargs # https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py @@ -87,57 +88,6 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: ) -def params_to_kwargs( - *, - batch_size, - n_steps, - gamma, - learning_rate, - ent_coef, - clip_range, - n_epochs, - gae_lambda, - max_grad_norm, - vf_coef, - net_arch_type, - ortho_init, - activation_fn_name, - **kwargs, -): - net_arch = { - "tiny": dict(pi=[64], vf=[64]), - "small": dict(pi=[64, 64], vf=[64, 64]), - "medium": dict(pi=[256, 256], vf=[256, 256]), - }[net_arch_type] - - activation_fn = { - "tanh": nn.Tanh, - "relu": nn.ReLU, - "elu": nn.ELU, - "leaky_relu": nn.LeakyReLU, - }[activation_fn_name] - - return { - "n_steps": n_steps, - "batch_size": batch_size, - "gamma": gamma, - "learning_rate": learning_rate, - "ent_coef": ent_coef, - "clip_range": clip_range, - "n_epochs": n_epochs, - "gae_lambda": gae_lambda, - "max_grad_norm": max_grad_norm, - "vf_coef": vf_coef, - # "sde_sample_freq": sde_sample_freq, - "policy_kwargs": { - # log_std_init=log_std_init, - "net_arch": net_arch, - "activation_fn": activation_fn, - "ortho_init": ortho_init, - }, - } - - # https://github.com/optuna/optuna-examples/blob/main/rl/sb3_simple.py class TrialEvalCallback(EvalCallback): """Callback used for evaluating and reporting a trial.""" @@ -246,13 +196,8 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: "value": trial.value, "params": trial.params, "user_attrs": trial.user_attrs, - "kwargs": params_to_kwargs(**trial.params), } - trial_dict["kwargs"]["policy_kwargs"]["activation_fn"] = trial_dict["params"][ - "activation_fn_name" - ] - with open(path, "w") as fp: json.dump( trial_dict, From c1658b6314930db95eb1f6a9c3690f97f34a5b67 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 14:59:47 +0200 Subject: [PATCH 07/44] Add pyproject.toml --- .github/workflows/lint.yml | 30 ++++++++++++++++++++ .github/workflows/test.yml | 33 ++++++++++++++++++++++ pyproject.toml | 57 ++++++++++++++++++++++++++++++++++++++ setup.py | 28 ++++++++++++------- 4 files changed, 138 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/test.yml create mode 100644 pyproject.toml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..0405eb7 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,30 @@ +name: Lint Python package + +on: + push: + pull_request: + branches: ['main'] + +jobs: + lint: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11'] + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + sudo apt-get install python3-opengl + python -m pip install --upgrade pip + pip install ".[test]" + - name: Run MyPy + run: | + mypy src/fastfiz_env diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..4c60f76 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,33 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Test Python package + +on: + push: + pull_request: + branches: ['main'] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11'] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + sudo apt-get install python3-opengl + python -m pip install --upgrade pip + pip install ".[test]" + - name: Test with pytest + run: | + pytest diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..63e4dba --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + + +[project] +name = "fastfiz-env" +description = "Gymnasium environments for FastFiz pool simulator." +readme = "README.md" +requires-python = ">=3.10" +dynamic = ["version"] +dependencies = [ + "fastfiz @ git+https://github.com/P6-Pool/fastfiz.git@2af8aed22bec1faeb5ac92b98b0751a0023f3fb7", + "gymnasium", + "numpy", + "vectormath", +] + +[project.optional-dependencies] +dev = [ + "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062", + "stable-baselines3", + "tqdm", + "rich", + "torch", + "tensorboard", + "optuna", +] +test = ["pytest", "mypy", "ruff"] +all = [ + # dev + "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062", + "stable-baselines3", + "tqdm", + "rich", + "torch", + "tensorboard", + "optuna", + # test + "pytest", + "mypy", + "ruff", +] + +[tool.pytest.ini_options] +filterwarnings = [ + "ignore::DeprecationWarning:tensorboard", + "ignore::UserWarning:gym", +] + +[tool.mypy] +ignore_missing_imports = true +follow_imports = "silent" +show_error_codes = true + +[tool.ruff] +line-length = 127 diff --git a/setup.py b/setup.py index 31ad11f..1d8cae7 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,23 @@ +import re from setuptools import setup, find_packages -with open("requirements.txt") as f: - requirements = f.read().splitlines() +# with open("requirements.txt") as f: +# requirements = f.read().splitlines() + + +def get_version(): + with open("src/fastfiz_env/__init__.py", "r") as f: + for line in f: + match = re.match(r"__version__\s*=\s*['\"]([^'\"]+)['\"]", line) + if match: + return match.group(1) + raise RuntimeError("Version not found in __init__.py") + setup( - name="fastfiz-env", - description="Gymnasium environment for FastFiz pool simulator", - version="0.0.1", - license="MIT", - install_requires=requirements, - test_requires=["pytest"], - packages=find_packages(where="src"), - package_dir={"": "src"}, + version=get_version(), + # install_requires=requirements, + # test_requires=["pytest"], + # packages=find_packages(where="src"), + # package_dir={"": "src"}, ) From fe87f5af50ea3ee0f654478b2e7cbfcf40ffebf4 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 15:10:08 +0200 Subject: [PATCH 08/44] Add options arg to envs --- src/fastfiz_env/envs/pockets_fastfiz.py | 10 +++++++--- src/fastfiz_env/envs/simple_fastfiz.py | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/fastfiz_env/envs/pockets_fastfiz.py b/src/fastfiz_env/envs/pockets_fastfiz.py index de94bb7..e635cbf 100644 --- a/src/fastfiz_env/envs/pockets_fastfiz.py +++ b/src/fastfiz_env/envs/pockets_fastfiz.py @@ -28,6 +28,7 @@ def __init__( *, reward_function: RewardFunction = DefaultReward, num_balls: int = 16, + options: Optional[dict] = None, ) -> None: super().__init__() self.num_balls = num_balls @@ -37,6 +38,8 @@ def __init__( self.reward = reward_function self.max_episode_steps = None self.elapsed_steps = None + self.options = options or {} + self._quick_terminate = self.options.get("quick_terminate", False) def _get_time_limit_attrs(self): try: @@ -148,10 +151,11 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: return possible_shot(self.table_state, shot_params) def _is_terminal_state(self) -> bool: - pocketed = num_balls_pocketed(self.table_state) + if self._quick_terminate: + pocketed = num_balls_pocketed(self.table_state) - if pocketed <= self._prev_pocketed: - return True + if pocketed <= self._prev_pocketed: + return True return terminal_state(self.table_state) diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py index a9f8875..b316356 100644 --- a/src/fastfiz_env/envs/simple_fastfiz.py +++ b/src/fastfiz_env/envs/simple_fastfiz.py @@ -26,6 +26,7 @@ def __init__( *, reward_function: RewardFunction = DefaultReward, num_balls: int = 16, + options: Optional[dict] = None, ) -> None: super().__init__() self.num_balls = num_balls @@ -35,6 +36,8 @@ def __init__( self.reward = reward_function self.max_episode_steps = None self.elapsed_steps = None + self.options = options or {} + self._quick_terminate = self.options.get("quick_terminate", False) def _get_time_limit_attrs(self): try: @@ -131,10 +134,11 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: return possible_shot(self.table_state, shot_params) def _is_terminal_state(self) -> bool: - pocketed = num_balls_pocketed(self.table_state) + if self._quick_terminate: + pocketed = num_balls_pocketed(self.table_state) - if pocketed <= self._prev_pocketed: - return True + if pocketed <= self._prev_pocketed: + return True return terminal_state(self.table_state) From e537e545cc7bf7aa1b6fe77acbf144ed765aebdf Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 15:32:10 +0200 Subject: [PATCH 09/44] Add env options arg --- src/fastfiz_env/make.py | 13 +++++++++++-- src/optimize.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py index 9b13b4e..14d315b 100644 --- a/src/fastfiz_env/make.py +++ b/src/fastfiz_env/make.py @@ -1,3 +1,4 @@ +from typing import Optional from gymnasium.envs.registration import EnvSpec import gymnasium as gym @@ -39,7 +40,11 @@ def make( def make_wrapped_env( - env_id: str, num_balls: int, max_episode_steps: int, reward_function: RewardFunction + env_id: str, + num_balls: int, + max_episode_steps: int, + reward_function: RewardFunction, + **kwargs, ): """ Create an instance of the specified environment with the FastFizActionWrapper. @@ -50,6 +55,7 @@ def make_wrapped_env( num_balls=num_balls, max_episode_steps=max_episode_steps, disable_env_checker=False, + **kwargs, ) env = FastFizActionWrapper(env, action_space_id=ActionSpaces.NO_OFFSET_3D) return env @@ -60,6 +66,7 @@ def make_callable_wrapped_env( num_balls: int, max_episode_steps: int, reward_function: RewardFunction, + **kwargs, ): """ Create a callable function that returns an instance of the specified environment with the FastFizActionWrapper. @@ -67,6 +74,8 @@ def make_callable_wrapped_env( """ def _init() -> gym.Env: - return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function) + return make_wrapped_env( + env_id, num_balls, max_episode_steps, reward_function, **kwargs + ) return _init diff --git a/src/optimize.py b/src/optimize.py index 222728b..0de08dc 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -135,13 +135,14 @@ def objective( n_timesteps: int, start_time: str, no_logs: bool, + env_kwargs: dict, ) -> float: kwargs = sample_ppo_params(trial) N_ENVS = 4 env = make_vec_env( make_callable_wrapped_env( - env_id, num_balls, max_episode_steps, reward_function + env_id, num_balls, max_episode_steps, reward_function, **env_kwargs ), n_envs=N_ENVS, ) @@ -206,6 +207,28 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: ) +class StoreDict(argparse.Action): + """ + Custom argparse action for storing dict. + + In: args1:0.0 args2:"dict(a=1)" + Out: {'args1': 0.0, arg2: dict(a=1)} + """ + + def __init__(self, option_strings, dest, nargs=None, **kwargs): + self._nargs = nargs + super().__init__(option_strings, dest, nargs=nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + arg_dict = {} + for arguments in values: # type: ignore + key = arguments.split(":")[0] + value = ":".join(arguments.split(":")[1:]) + # Evaluate the string as python code + arg_dict[key] = eval(value) + setattr(namespace, self.dest, arg_dict) + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Description of your program") parser.add_argument("--n_trials", type=int, default=20, help="Number of trials") @@ -249,6 +272,15 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: "--no-logs", action="store_true", help="Disable Tensorboard logging" ) + parser.add_argument( + "--env-options", + type=str, + nargs="+", + action=StoreDict, + help="Optional keyword argument to pass to the env constructor", + default={}, + ) + args = parser.parse_args() # Set pytorch num threads to 1 for faster training. @@ -265,6 +297,8 @@ def save_trial(trial: optuna.trial.FrozenTrial, path: str) -> None: reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward + env_kwargs = {"options": args.env_options} + def obj_fn(trial): return objective( trial, @@ -277,6 +311,7 @@ def obj_fn(trial): args.n_timesteps, start_time, args.no_logs, + env_kwargs, ) try: From bb9a25c93fb0bec0606d63c909d8ce6c8f5a7bca Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 18:47:16 +0200 Subject: [PATCH 10/44] Fix assert value --- src/tests/envs/test_envs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/envs/test_envs.py b/src/tests/envs/test_envs.py index df6bb51..5ae81f7 100644 --- a/src/tests/envs/test_envs.py +++ b/src/tests/envs/test_envs.py @@ -29,7 +29,7 @@ def test_step(self): obs, reward, done, truncated, info = env.step(action) self.assertEqual(obs.shape, (16, 2)) self.assertEqual(reward, 1) - self.assertEqual(done, True) # Will terminate as no balls were pocketed + self.assertEqual(done, False) self.assertEqual(truncated, False) self.assertEqual(info, {"is_success": False}) From 368e7a0a49b48eebb0b4f7a338820409a7814396 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 18:47:39 +0200 Subject: [PATCH 11/44] Remove p5 import --- src/fastfiz_env/utils/fastfiz/renderer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fastfiz_env/utils/fastfiz/renderer.py b/src/fastfiz_env/utils/fastfiz/renderer.py index cc2bcc1..43c084f 100644 --- a/src/fastfiz_env/utils/fastfiz/renderer.py +++ b/src/fastfiz_env/utils/fastfiz/renderer.py @@ -1,4 +1,3 @@ -from p5 import * import fastfiz as ff import vectormath as vmath From fb98c9628e766a884a9e062c8e844a69217b80cc Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 18:48:20 +0200 Subject: [PATCH 12/44] Add swig and gsl to dependencies --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c60f76..7287df4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,7 +25,8 @@ jobs: cache: 'pip' - name: Install dependencies run: | - sudo apt-get install python3-opengl + sudo apt update + sudo apt install python3-opengl swig libgsl-dev python -m pip install --upgrade pip pip install ".[test]" - name: Test with pytest From bcf7a07379d625312d16c04e1e0e273a041ff4c5 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 18:48:39 +0200 Subject: [PATCH 13/44] Format with Ruff --- src/eval.py | 12 +-- src/fastfiz_env/envs/frames_fastfiz.py | 24 ++--- src/fastfiz_env/envs/pockets_fastfiz.py | 8 +- src/fastfiz_env/envs/simple_fastfiz.py | 4 +- src/fastfiz_env/envs/testing_fastfiz.py | 21 +---- src/fastfiz_env/envs/utils.py | 4 +- src/fastfiz_env/make.py | 4 +- .../reward_functions/combined_reward.py | 10 +- .../delta_best_total_distance_reward.py | 8 +- .../common/impossible_shot_reward.py | 5 +- .../common/total_distance_reward.py | 4 +- .../reward_functions/common/weights.py | 36 ++----- .../reward_functions/reward_function.py | 8 +- src/fastfiz_env/utils/fastfiz/fastfiz.py | 69 ++++---------- src/fastfiz_env/utils/fastfiz/renderer.py | 40 ++------ src/fastfiz_env/wrappers/action.py | 25 ++--- .../wrappers/time_limit_injection.py | 4 +- src/optimize.py | 52 +++-------- src/tests/utils/test_features.py | 1 - src/tests/utils/test_reward_functions.py | 93 +++++-------------- src/train.py | 12 +-- 21 files changed, 109 insertions(+), 335 deletions(-) diff --git a/src/eval.py b/src/eval.py index f481cf2..1ace5d3 100644 --- a/src/eval.py +++ b/src/eval.py @@ -85,13 +85,9 @@ def decide_shot(self, table_state: ff.TableState) -> Optional[ff.ShotParams]: for _ in range(10): if isinstance(self.env, FramesFastFiz): if self.prev_ts is None: - obs = self.env.compute_observation( - table_state, table_state, self.shot - ) + obs = self.env.compute_observation(table_state, table_state, self.shot) else: - obs = self.env.compute_observation( - self.prev_ts, table_state, self.shot - ) + obs = self.env.compute_observation(self.prev_ts, table_state, self.shot) elif isinstance(self.env, PocketsFastFiz): obs = self.env.compute_observation(table_state) else: @@ -114,9 +110,7 @@ def main() -> None: parser.add_argument("-m", "--model", type=str, help="Path to the model file") args = parser.parse_args() - assert args.model is not None and os.path.exists( - args.model - ), f"Model file not found: {args.model}" + assert args.model is not None and os.path.exists(args.model), f"Model file not found: {args.model}" model = PPO.load(args.model) diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py index eb586ce..d18f464 100644 --- a/src/fastfiz_env/envs/frames_fastfiz.py +++ b/src/fastfiz_env/envs/frames_fastfiz.py @@ -28,9 +28,7 @@ class FramesFastFiz(gym.Env): TOTAL_BALLS = 16 # Including the cue ball num_balls = 2 - def __init__( - self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16 - ) -> None: + def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16) -> None: super().__init__() if num_balls < 2: warnings.warn( @@ -47,15 +45,11 @@ def __init__( def _max_episode_steps(self): if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None: - self.max_episode_steps = self.get_wrapper_attr( - "_time_limit_max_episode_steps" - ) + self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps") print(f"Setting max episode steps to {self.max_episode_steps}") self.reward.max_episode_steps = self.max_episode_steps - def reset( - self, *, seed: Optional[int] = None, options: Optional[dict] = None - ) -> tuple[np.ndarray, dict]: + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: """ Reset the environment to its initial state. """ @@ -187,14 +181,9 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: """ Check if the shot is possible. """ - return ( - self.table_state.isPhysicallyPossible(shot_params) - == ff.TableState.OK_PRECONDITION - ) + return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION - def _compute_observation( - self, prev_table_state: ff.TableState, shot: Optional[ff.Shot] - ) -> np.ndarray: + def _compute_observation(self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]) -> np.ndarray: return self.compute_observation(prev_table_state, self.table_state, shot) @classmethod @@ -236,8 +225,7 @@ def compute_observation( pocketed = is_pocketed_state(gb.state) frames_seq[frame][gb.number] = [ *normalize_ball_positions((gb.position.x, gb.position.y)), # type: ignore - normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - - 1, + normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - 1, pocketed, ] return frames_seq diff --git a/src/fastfiz_env/envs/pockets_fastfiz.py b/src/fastfiz_env/envs/pockets_fastfiz.py index e635cbf..ae7c9bf 100644 --- a/src/fastfiz_env/envs/pockets_fastfiz.py +++ b/src/fastfiz_env/envs/pockets_fastfiz.py @@ -54,9 +54,7 @@ def _get_time_limit_attrs(self): self.reward.max_episode_steps = self.max_episode_steps - def reset( - self, *, seed: Optional[int] = None, options: Optional[dict] = None - ) -> tuple[np.ndarray, dict]: + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: super().reset(seed=seed) if self.max_episode_steps is None or self.elapsed_steps is None: @@ -66,9 +64,7 @@ def reset( self.reward.reset(self.table_state) self._prev_pocketed = 0 - self._pocket_centers = normalize_ball_positions( - pocket_centers(self.table_state) - ) + self._pocket_centers = normalize_ball_positions(pocket_centers(self.table_state)) observation = self._get_observation() info = self._get_info() diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py index b316356..f1105d2 100644 --- a/src/fastfiz_env/envs/simple_fastfiz.py +++ b/src/fastfiz_env/envs/simple_fastfiz.py @@ -52,9 +52,7 @@ def _get_time_limit_attrs(self): self.reward.max_episode_steps = self.max_episode_steps - def reset( - self, *, seed: Optional[int] = None, options: Optional[dict] = None - ) -> tuple[np.ndarray, dict]: + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: super().reset(seed=seed) if self.max_episode_steps is None or self.elapsed_steps is None: diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py index b47dddd..2265180 100644 --- a/src/fastfiz_env/envs/testing_fastfiz.py +++ b/src/fastfiz_env/envs/testing_fastfiz.py @@ -68,14 +68,10 @@ def __init__( def _max_episode_steps(self): if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None: - self.max_episode_steps = self.get_wrapper_attr( - "_time_limit_max_episode_steps" - ) + self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps") self.reward.max_episode_steps = self.max_episode_steps - def reset( - self, *, seed: Optional[int] = None, options: Optional[dict] = None - ) -> tuple[np.ndarray, dict]: + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: super().reset(seed=seed) if self.max_episode_steps is None: @@ -96,9 +92,7 @@ def reset( observation = self._get_observation() info = self._get_info() - self.logger.info( - "Reset(%s) - initial observation:\n%s", self.n_episodes, observation - ) + self.logger.info("Reset(%s) - initial observation:\n%s", self.n_episodes, observation) self.logger.info("Reset(%s) - initial info: %s", self.n_episodes, info) self.n_episodes += 1 @@ -156,9 +150,7 @@ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict] def _get_observation(self): ball_positions = get_ball_positions(self.table_state)[: self.TOTAL_BALLS] # ball_positions = normalize_ball_positions(ball_positions) # Normalize to [0, 1] - ball_positions = ( - normalize_ball_positions(ball_positions) * 2 - 1 - ) # Normalize to [-1, 1] (symmetric) + ball_positions = normalize_ball_positions(ball_positions) * 2 - 1 # Normalize to [-1, 1] (symmetric) observation = np.zeros((self.TOTAL_BALLS, 2), dtype=np.float32) for i, ball_pos in enumerate(ball_positions): observation[i] = [*ball_pos] @@ -224,7 +216,4 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: """ Check if the shot is possible. """ - return ( - self.table_state.isPhysicallyPossible(shot_params) - == ff.TableState.OK_PRECONDITION - ) + return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION diff --git a/src/fastfiz_env/envs/utils.py b/src/fastfiz_env/envs/utils.py index ffc27e1..2e1ccb4 100644 --- a/src/fastfiz_env/envs/utils.py +++ b/src/fastfiz_env/envs/utils.py @@ -26,6 +26,4 @@ def possible_shot(table_state: ff.TableState, shot_params: ff.ShotParams) -> boo """ Check if the shot is possible. """ - return ( - table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION - ) + return table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py index 14d315b..0b46333 100644 --- a/src/fastfiz_env/make.py +++ b/src/fastfiz_env/make.py @@ -74,8 +74,6 @@ def make_callable_wrapped_env( """ def _init() -> gym.Env: - return make_wrapped_env( - env_id, num_balls, max_episode_steps, reward_function, **kwargs - ) + return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, **kwargs) return _init diff --git a/src/fastfiz_env/reward_functions/combined_reward.py b/src/fastfiz_env/reward_functions/combined_reward.py index f86c854..eb4bb4b 100644 --- a/src/fastfiz_env/reward_functions/combined_reward.py +++ b/src/fastfiz_env/reward_functions/combined_reward.py @@ -75,14 +75,12 @@ def reward( total_reward += reward if issubclass(reward_function.__class__, BinaryReward): - if ( - reward == 1 * reward_function.weight() - and self.short_circuit - and reward_function.short_circuit - ): + if reward == 1 * reward_function.weight() and self.short_circuit and reward_function.short_circuit: return total_reward return total_reward def __str__(self) -> str: - return f"CombinedReward({[str(reward) for reward in self.reward_functions]}, {None}, short_circuit={self.short_circuit})" + return ( + f"CombinedReward({[str(reward) for reward in self.reward_functions]}, {None}, short_circuit={self.short_circuit})" + ) diff --git a/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py b/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py index 97cba01..eead78a 100644 --- a/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py +++ b/src/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py @@ -19,9 +19,7 @@ def reset(self, table_state) -> None: self.pockets = pocket_centers(table_state) # num_balls = num_balls_in_play(table_state) ball_positions = get_ball_positions(table_state)[1 : self.num_balls] - self.min_total_dist = np.sum( - distances_to_closest_pocket(ball_positions, self.pockets) - ) + self.min_total_dist = np.sum(distances_to_closest_pocket(ball_positions, self.pockets)) def reward( self, @@ -34,9 +32,7 @@ def reward( """ num_balls = num_balls_in_play(table_state) ball_positions = get_ball_positions(table_state)[1:num_balls] - new_total_dist = np.sum( - distances_to_closest_pocket(ball_positions, self.pockets) - ) + new_total_dist = np.sum(distances_to_closest_pocket(ball_positions, self.pockets)) reward = float(self.min_total_dist - new_total_dist) diff --git a/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py b/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py index 31878d2..d744eb5 100644 --- a/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py +++ b/src/fastfiz_env/reward_functions/common/impossible_shot_reward.py @@ -18,8 +18,5 @@ def reward( Reward function returns 1 if the shot is impossible, 0 otherwise. """ shot_params = ff.ShotParams(*action) - impossible_shot = ( - table_state.isPhysicallyPossible(shot_params) - != ff.TableState.OK_PRECONDITION - ) + impossible_shot = table_state.isPhysicallyPossible(shot_params) != ff.TableState.OK_PRECONDITION return 1 if impossible_shot else 0 diff --git a/src/fastfiz_env/reward_functions/common/total_distance_reward.py b/src/fastfiz_env/reward_functions/common/total_distance_reward.py index 9d2aac2..30e05ac 100644 --- a/src/fastfiz_env/reward_functions/common/total_distance_reward.py +++ b/src/fastfiz_env/reward_functions/common/total_distance_reward.py @@ -29,7 +29,5 @@ def reward( """ num_balls = num_balls_in_play(table_state) ball_positions = get_ball_positions(table_state)[1:num_balls] - total_distance = np.sum( - distances_to_closest_pocket(ball_positions, self.pockets) - ) + total_distance = np.sum(distances_to_closest_pocket(ball_positions, self.pockets)) return total_distance diff --git a/src/fastfiz_env/reward_functions/common/weights.py b/src/fastfiz_env/reward_functions/common/weights.py index 586fbc0..6b0622d 100644 --- a/src/fastfiz_env/reward_functions/common/weights.py +++ b/src/fastfiz_env/reward_functions/common/weights.py @@ -5,53 +5,35 @@ NegativeConstantWeight = -ConstantWeight -def ConstantWeightMaxSteps( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def ConstantWeightMaxSteps(num_balls: int, current_step: int, max_steps: int | None) -> float: assert max_steps is not None, "Max steps must be defined for ConstantWeightMaxSteps" return ConstantWeight / max_steps -def NegativeConstantWeightMaxSteps( - num_balls: int, current_step: int, max_steps: int | None -) -> float: - assert ( - max_steps is not None - ), "Max steps must be defined for NegativeConstantWeightMaxSteps" +def NegativeConstantWeightMaxSteps(num_balls: int, current_step: int, max_steps: int | None) -> float: + assert max_steps is not None, "Max steps must be defined for NegativeConstantWeightMaxSteps" return NegativeConstantWeight / max_steps -def ConstantWeightNumBalls( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def ConstantWeightNumBalls(num_balls: int, current_step: int, max_steps: int | None) -> float: return ConstantWeight / num_balls -def NegativeConstantWeightNumBalls( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def NegativeConstantWeightNumBalls(num_balls: int, current_step: int, max_steps: int | None) -> float: return NegativeConstantWeight / num_balls -def ConstantWeightBalls( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def ConstantWeightBalls(num_balls: int, current_step: int, max_steps: int | None) -> float: return ConstantWeight / (num_balls - 1) -def NegativeConstantWeightBalls( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def NegativeConstantWeightBalls(num_balls: int, current_step: int, max_steps: int | None) -> float: return NegativeConstantWeight / (num_balls - 1) -def ConstantWeightCurrentStep( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def ConstantWeightCurrentStep(num_balls: int, current_step: int, max_steps: int | None) -> float: return ConstantWeight / current_step -def NegativeConstantWeightCurrentStep( - num_balls: int, current_step: int, max_steps: int | None -) -> float: +def NegativeConstantWeightCurrentStep(num_balls: int, current_step: int, max_steps: int | None) -> float: return NegativeConstantWeight / current_step diff --git a/src/fastfiz_env/reward_functions/reward_function.py b/src/fastfiz_env/reward_functions/reward_function.py index f55e5cf..32545dd 100644 --- a/src/fastfiz_env/reward_functions/reward_function.py +++ b/src/fastfiz_env/reward_functions/reward_function.py @@ -81,9 +81,7 @@ def get_reward( float: The calculated reward value. """ if not self.__reset_called: - raise RuntimeError( - f"{self.__class__.__name__} reset() method must be called before calling get_reward()." - ) + raise RuntimeError(f"{self.__class__.__name__} reset() method must be called before calling get_reward().") self.current_step += 1 return self.reward(prev_table_state, table_state, action) * self.weight() @@ -115,9 +113,7 @@ def weight( float: The weight of the reward function. """ if callable(self.__weight): - return self.__weight( - self.num_balls, self.current_step, self.max_episode_steps - ) + return self.__weight(self.num_balls, self.current_step, self.max_episode_steps) return self.__weight def __str__(self): diff --git a/src/fastfiz_env/utils/fastfiz/fastfiz.py b/src/fastfiz_env/utils/fastfiz/fastfiz.py index 8a5f4ad..d4c936c 100644 --- a/src/fastfiz_env/utils/fastfiz/fastfiz.py +++ b/src/fastfiz_env/utils/fastfiz/fastfiz.py @@ -44,13 +44,7 @@ def num_balls_in_play(table_state: ff.TableState) -> int: Returns: int: The number of balls in play. """ - return len( - [ - i - for i in range(table_state.getNumBalls()) - if table_state.getBall(i).isInPlay() - ] - ) + return len([i for i in range(table_state.getNumBalls()) if table_state.getBall(i).isInPlay()]) def num_balls_pocketed( @@ -71,14 +65,10 @@ def num_balls_pocketed( int: The number of balls pocketed. """ stop = table_state.getNumBalls() if range_stop is None else range_stop - return len( - [i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()] - ) + return len([i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()]) -def any_ball_has_moved( - prev_ball_positions: np.ndarray, ball_positions: np.ndarray -) -> bool: +def any_ball_has_moved(prev_ball_positions: np.ndarray, ball_positions: np.ndarray) -> bool: """ Check if any ball has moved by comparing the previous ball positions with the current ball positions. @@ -176,9 +166,7 @@ def distance_to_closest_pocket(ball_position: np.ndarray, pockets: np.ndarray) - return np.min(distance_to_pockets(ball_position, pockets)) -def distances_to_closest_pocket( - ball_positions: np.ndarray, pockets: np.ndarray -) -> np.ndarray: +def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) -> np.ndarray: """ Calculates the distances from each ball position to the closest pocket. @@ -189,12 +177,7 @@ def distances_to_closest_pocket( Returns: np.ndarray: An array of distances from each ball position to the closest pocket. """ - return np.array( - [ - distance_to_closest_pocket(ball_position, pockets) - for ball_position in ball_positions - ] - ) + return np.array([distance_to_closest_pocket(ball_position, pockets) for ball_position in ball_positions]) def create_table_state(n_balls: int) -> ff.TableState: @@ -223,9 +206,7 @@ def create_table_state(n_balls: int) -> ff.TableState: return table_state -def create_random_table_state( - n_balls: int, seed: Optional[int] = None -) -> ff.TableState: +def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.TableState: """ Creates a random table state with the specified number of balls. @@ -241,9 +222,7 @@ def create_random_table_state( return table_state -def randomize_table_state( - table_state: ff.TableState, seed: Optional[int] = None -) -> None: +def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None) -> None: """ Randomizes the positions of the balls on the pool table within the given table state. @@ -297,9 +276,7 @@ def randomize_table_state( return table_state -def map_action_to_shot_params( - table_state: ff.TableState, action: np.ndarray -) -> np.ndarray: +def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray: """ Maps the given action values to the corresponding shot parameters within the specified ranges. @@ -313,17 +290,13 @@ def map_action_to_shot_params( """ a = np.interp(action[0], [0, 0], [0, 0]) b = np.interp(action[1], [0, 0], [0, 0]) - theta = np.interp( - action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001] - ) + theta = np.interp(action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001]) phi = np.interp(action[3], [-1, 1], [0, 360]) v = np.interp(action[4], [-1, 1], [0, table_state.MAX_VELOCITY - 0.001]) return np.array([0, 0, theta, phi, v], dtype=np.float64) -def shot_params_from_action( - table_state: ff.TableState, action: np.ndarray -) -> ff.ShotParams: +def shot_params_from_action(table_state: ff.TableState, action: np.ndarray) -> ff.ShotParams: """ Converts an action into shot parameters. @@ -349,21 +322,11 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam MAX_THETA = ff.TableState.MAX_THETA MAX_VELOCITY = ff.TableState.MAX_VELOCITY - a = np.interp( - action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET] - ) - b = np.interp( - action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET] - ) - theta = np.interp( - action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA] - ) - phi = np.interp( - action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI] - ) - velocity = np.interp( - action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY] - ) + a = np.interp(action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET]) + b = np.interp(action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET]) + theta = np.interp(action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA]) + phi = np.interp(action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI]) + velocity = np.interp(action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY]) # print(f"a: {a}, b: {b}, theta: {theta}, phi: {phi}, velocity: {velocity}") @@ -371,7 +334,7 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam def normalize_ball_positions( - ball_positions: np.ndarray[float, np.dtype[np.float32]] + ball_positions: np.ndarray[float, np.dtype[np.float32]], ) -> np.ndarray[float, np.dtype[np.float32]]: """ Normalize the ball positions to be within the range [0, 1]. diff --git a/src/fastfiz_env/utils/fastfiz/renderer.py b/src/fastfiz_env/utils/fastfiz/renderer.py index 43c084f..1a44b29 100644 --- a/src/fastfiz_env/utils/fastfiz/renderer.py +++ b/src/fastfiz_env/utils/fastfiz/renderer.py @@ -41,43 +41,25 @@ def update( time_since_event_start = time_since_shot_start - cur_state.e_time def calc_sliding_displacement(delta_time: float) -> vmath.Vector2: - rotational_velocity: vmath.Vector3 = GameBall.RADIUS * vmath.Vector3( - 0, 0, cur_state.ang_vel.z - ).cross(cur_state.ang_vel) - relative_velocity = cur_state.vel + vmath.Vector2( - rotational_velocity.x, rotational_velocity.y + rotational_velocity: vmath.Vector3 = GameBall.RADIUS * vmath.Vector3(0, 0, cur_state.ang_vel.z).cross( + cur_state.ang_vel ) + relative_velocity = cur_state.vel + vmath.Vector2(rotational_velocity.x, rotational_velocity.y) self.velocity = ( - cur_state.vel - - delta_time - * gravitational_const - * sliding_friction_const - * relative_velocity.normalize() + cur_state.vel - delta_time * gravitational_const * sliding_friction_const * relative_velocity.normalize() ) return ( cur_state.vel * delta_time - - 0.5 - * sliding_friction_const - * gravitational_const - * delta_time**2 - * relative_velocity.normalize() + - 0.5 * sliding_friction_const * gravitational_const * delta_time**2 * relative_velocity.normalize() ) def calc_rolling_displacement(delta_time: float) -> vmath.Vector2: self.velocity = ( - cur_state.vel - - gravitational_const - * rolling_friction_const - * delta_time - * cur_state.vel.copy().normalize() + cur_state.vel - gravitational_const * rolling_friction_const * delta_time * cur_state.vel.copy().normalize() ) return ( cur_state.vel * delta_time - - 0.5 - * rolling_friction_const - * gravitational_const - * delta_time**2 - * cur_state.vel.copy().normalize() + - 0.5 * rolling_friction_const * gravitational_const * delta_time**2 * cur_state.vel.copy().normalize() ) displacement = vmath.Vector2(0, 0) @@ -103,14 +85,10 @@ def _get_relevant_ball_states_from_shot(self, shot: ff.Shot): for event in shot.getEventList(): event: ff.Event if event.getBall1() == self.number: - new_ball_event = _BallState.from_event_and_ball( - event, event.getBall1Data() - ) + new_ball_event = _BallState.from_event_and_ball(event, event.getBall1Data()) relevant_states.append(new_ball_event) elif event.getBall2() == self.number: - new_ball_event = _BallState.from_event_and_ball( - event, event.getBall2Data() - ) + new_ball_event = _BallState.from_event_and_ball(event, event.getBall2Data()) relevant_states.append(new_ball_event) return relevant_states diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py index 3185abb..bb59399 100644 --- a/src/fastfiz_env/wrappers/action.py +++ b/src/fastfiz_env/wrappers/action.py @@ -123,10 +123,7 @@ def __init__( self.action_space_id = action_space_id self.action_space = self.SPACES[action_space_id.name] - def action( - self, action: np.ndarray[float, np.dtype[np.float32]] - ) -> np.ndarray[float, np.dtype[np.float32]]: - + def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[float, np.dtype[np.float32]]: # Offset a and b are always 0 offset_a = 0 offset_b = 0 @@ -143,9 +140,7 @@ def action( phi = vec_to_abs_deg(vec_phi) vec_velocity = vec_length(vec_theta + vec_phi) - velocity = np.interp( - vec_velocity, (0, 2), (self.MIN_VELOCITY, self.MAX_VELOCITY) - ) + velocity = np.interp(vec_velocity, (0, 2), (self.MIN_VELOCITY, self.MAX_VELOCITY)) case ActionSpaces.VECTOR_2D: if np.allclose(action, 0): @@ -167,9 +162,7 @@ def action( r, theta, phi = spherical_coordinates(action) theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA)) phi = np.interp(phi, (0, 360), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp( - r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY) - ) + velocity = np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)) case ActionSpaces.NO_OFFSET_5D: if np.allclose(action, 0): @@ -181,21 +174,15 @@ def action( vec_phi = action[2:4] phi = vec_to_abs_deg(vec_phi) - velocity = np.interp( - action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY) - ) + velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) case ActionSpaces.NORM_PARAMS_5D: theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp( - action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY) - ) + velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) case ActionSpaces.NO_OFFSET_NORM_PARAMS_3D: theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp( - action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY) - ) + velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) action = np.array([offset_a, offset_b, theta, phi, velocity]) return action diff --git a/src/fastfiz_env/wrappers/time_limit_injection.py b/src/fastfiz_env/wrappers/time_limit_injection.py index 40a348d..c91072e 100644 --- a/src/fastfiz_env/wrappers/time_limit_injection.py +++ b/src/fastfiz_env/wrappers/time_limit_injection.py @@ -32,7 +32,5 @@ def __init__(self, env): elapsed_steps = get_wrapper_attr(self.env, gym.wrappers.TimeLimit, "_elapsed_steps") # type: ignore # Inject the max_episode_steps attribute into the base environment. - inject_attribute_into_base_env( - self.env, "_max_episode_steps", max_episode_steps - ) + inject_attribute_into_base_env(self.env, "_max_episode_steps", max_episode_steps) inject_attribute_into_base_env(self.env, "_elapsed_steps", elapsed_steps) diff --git a/src/optimize.py b/src/optimize.py index 0de08dc..af61169 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -28,26 +28,16 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: :return: """ batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 256, 512]) - n_steps = trial.suggest_categorical( - "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048] - ) - gamma = trial.suggest_categorical( - "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999] - ) + n_steps = trial.suggest_categorical("n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]) + gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True) ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True) clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4]) n_epochs = trial.suggest_categorical("n_epochs", [1, 5, 10, 20]) - gae_lambda = trial.suggest_categorical( - "gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0] - ) - max_grad_norm = trial.suggest_categorical( - "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5] - ) + gae_lambda = trial.suggest_categorical("gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]) + max_grad_norm = trial.suggest_categorical("max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]) vf_coef = trial.suggest_float("vf_coef", 0, 1) - net_arch_type = trial.suggest_categorical( - "net_arch_type", ["tiny", "small", "medium"] - ) + net_arch_type = trial.suggest_categorical("net_arch_type", ["tiny", "small", "medium"]) # Uncomment for gSDE (continuous actions) # log_std_init = trial.suggest_float("log_std_init", -4, 1) # Uncomment for gSDE (continuous action) @@ -56,9 +46,7 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: ortho_init = trial.suggest_categorical("ortho_init", [False]) # ortho_init = trial.suggest_categorical('ortho_init', [False, True]) # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu']) - activation_fn_name = trial.suggest_categorical( - "activation_fn_name", ["tanh", "relu"] - ) + activation_fn_name = trial.suggest_categorical("activation_fn_name", ["tanh", "relu"]) # lr_schedule = "constant" # Uncomment to enable learning rate schedule # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant']) @@ -141,9 +129,7 @@ def objective( N_ENVS = 4 env = make_vec_env( - make_callable_wrapped_env( - env_id, num_balls, max_episode_steps, reward_function, **env_kwargs - ), + make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, **env_kwargs), n_envs=N_ENVS, ) @@ -232,9 +218,7 @@ def __call__(self, parser, namespace, values, option_string=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Description of your program") parser.add_argument("--n_trials", type=int, default=20, help="Number of trials") - parser.add_argument( - "--n_startup_trials", type=int, default=5, help="Number of startup trials" - ) + parser.add_argument("--n_startup_trials", type=int, default=5, help="Number of startup trials") parser.add_argument( "--reward", type=str, @@ -242,18 +226,10 @@ def __call__(self, parser, namespace, values, option_string=None): default="DefaultReward", help="Reward function", ) - parser.add_argument( - "--n_timesteps", type=int, default=int(5e5), help="Number of timesteps" - ) - parser.add_argument( - "--num-balls", type=int, default=2, help="Number of balls in the environment" - ) - parser.add_argument( - "--eval_freq", type=int, default=10_000, help="Evaluation frequency" - ) - parser.add_argument( - "--n_eval_episodes", type=int, default=100, help="Number of evaluation episodes" - ) + parser.add_argument("--n_timesteps", type=int, default=int(5e5), help="Number of timesteps") + parser.add_argument("--num-balls", type=int, default=2, help="Number of balls in the environment") + parser.add_argument("--eval_freq", type=int, default=10_000, help="Evaluation frequency") + parser.add_argument("--n_eval_episodes", type=int, default=100, help="Number of evaluation episodes") parser.add_argument( "--env_id", type=str, @@ -268,9 +244,7 @@ def __call__(self, parser, namespace, values, option_string=None): default=20, help="Max episode steps for the environment", ) - parser.add_argument( - "--no-logs", action="store_true", help="Disable Tensorboard logging" - ) + parser.add_argument("--no-logs", action="store_true", help="Disable Tensorboard logging") parser.add_argument( "--env-options", diff --git a/src/tests/utils/test_features.py b/src/tests/utils/test_features.py index 5203c73..8a8e2e5 100644 --- a/src/tests/utils/test_features.py +++ b/src/tests/utils/test_features.py @@ -4,7 +4,6 @@ class TestFeatures(unittest.TestCase): - def test_deg_to_vec(self): self.assertTrue(np.allclose(deg_to_vec(0), [1, 0])) self.assertTrue(np.allclose(deg_to_vec(90), [0, 1])) diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py index fe6dc30..7e215e3 100644 --- a/src/tests/utils/test_reward_functions.py +++ b/src/tests/utils/test_reward_functions.py @@ -14,9 +14,7 @@ def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float class TestRewardFunctions(unittest.TestCase): - possible_shot_action = np.array( - [0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64 - ) + possible_shot_action = np.array([0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64) impossible_shot_action = np.array([0, 0, 0, 0, 0], dtype=np.float64) empty_action = np.array([], dtype=np.float64) @@ -27,12 +25,8 @@ def test_step_pocketed_reward(self): reward = StepPocketedReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) def test_game_won_reward(self): table_state = create_table_state(3) @@ -45,26 +39,17 @@ def test_game_won_reward(self): reward = GameWonReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) def test_constant_reward(self): - table_state = create_table_state(2) reward = ConstantReward(weight=weight_fn, max_episode_steps=10) reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10 - ) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10) def test_cue_ball_pocketed_reward(self): table_state = create_table_state(2) @@ -74,12 +59,8 @@ def test_cue_ball_pocketed_reward(self): reward = CueBallPocketedReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) def test_cue_ball_not_moved_reward(self): table_state = create_table_state(2) @@ -89,27 +70,17 @@ def test_cue_ball_not_moved_reward(self): reward = CueBallNotMovedReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 1 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_moved, self.empty_action), 0 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1) + self.assertEqual(reward.get_reward(table_state, table_state_moved, self.empty_action), 0) def test_impossible_shot_reward(self): table_state = create_table_state(2) reward = ImpossibleShotReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.possible_shot_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state, self.impossible_shot_action), 1 - ) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.possible_shot_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state, self.impossible_shot_action), 1) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1) def test_delta_best_total_distance_reward(self): # Pocket: [0., 1.118] @@ -121,9 +92,7 @@ def test_delta_best_total_distance_reward(self): table_state = create_table_state(2) table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118)) - self.assertEqual( - reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0 - ) + self.assertEqual(reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0) self.assertEqual( reward.get_reward(prev_table_state, table_state, self.empty_action), 0.25, @@ -173,9 +142,7 @@ def test_combined_reward(self): table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118)) self.assertEqual( - reward_function.get_reward( - prev_table_state, table_state, self.empty_action - ), + reward_function.get_reward(prev_table_state, table_state, self.empty_action), 3.3, ) @@ -184,22 +151,16 @@ def test_binary_reward_no_short_circuit(self): ImpossibleShotReward(10, short_circuit=False), ConstantReward(5), ] - reward_function = CombinedReward( - reward_functions=rewards_functions, short_circuit=True - ) + reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True) table_state = create_table_state(2) reward_function.reset(table_state) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.possible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.possible_shot_action), 5, ) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.impossible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.impossible_shot_action), 15, ) @@ -208,30 +169,22 @@ def test_binary_reward_short_circuit(self): ImpossibleShotReward(10, short_circuit=True), ConstantReward(5), ] - reward_function = CombinedReward( - reward_functions=rewards_functions, short_circuit=True - ) + reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True) table_state = create_table_state(2) reward_function.reset(table_state) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.possible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.possible_shot_action), 5, ) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.impossible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.impossible_shot_action), 10, ) def test_weights(self): table_state = create_table_state(3) - reward = ConstantReward( - weight=NegativeConstantWeightMaxSteps, max_episode_steps=10 - ) + reward = ConstantReward(weight=NegativeConstantWeightMaxSteps, max_episode_steps=10) reward.reset(table_state) self.assertEqual( diff --git a/src/train.py b/src/train.py index 4254eb6..ee48585 100644 --- a/src/train.py +++ b/src/train.py @@ -44,9 +44,7 @@ def train( params: Optional[dict] = None, ) -> None: env = make_vec_env( - make_callable_wrapped_env( - env_id, num_balls, max_episode_steps, reward_function - ), + make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function), n_envs=n_envs, ) @@ -55,9 +53,7 @@ def train( model_name = get_model_name(env_id, num_balls) if model_dir is None: - model = PPO( - "MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams - ) + model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams) else: model = PPO.load(model_dir, env=env, verbose=1, tensorboard_log=logs_path) pretrained_name = model_dir.split("/")[-1].rsplit(".zip", 1)[0] @@ -145,9 +141,7 @@ def train( assert os.path.exists(logs_path), f"params path does not exist: {logs_path}" with open(params_path, "r") as fp: params = json.load(fp) - assert ( - "params" in params - ), "params file must have key 'params' with dict of hyperparameters" + assert "params" in params, "params file must have key 'params' with dict of hyperparameters" params = params["params"] print( From 1a5d2e02a98b889d41b2771ddfe7b957c6df5556 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 18:48:58 +0200 Subject: [PATCH 14/44] Lint with Ruff --- setup.py | 2 +- src/eval.py | 9 ++------- src/fastfiz_env/envs/frames_fastfiz.py | 2 -- src/fastfiz_env/envs/simple_fastfiz.py | 2 +- src/fastfiz_env/envs/testing_fastfiz.py | 2 -- src/fastfiz_env/make.py | 1 - src/fastfiz_env/reward_functions/binary_reward.py | 2 +- .../reward_functions/common/constant_reward.py | 2 +- src/fastfiz_env/reward_functions/common/weights.py | 1 - src/optimize.py | 2 -- src/tests/utils/test_reward_functions.py | 1 - src/train.py | 4 ++-- 12 files changed, 8 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index 1d8cae7..8ada4c4 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ import re -from setuptools import setup, find_packages +from setuptools import setup # with open("requirements.txt") as f: # requirements = f.read().splitlines() diff --git a/src/eval.py b/src/eval.py index 1ace5d3..7f64133 100644 --- a/src/eval.py +++ b/src/eval.py @@ -2,22 +2,17 @@ import os from fastfiz_renderer import GameHandler import numpy as np -import fastfiz_env -from fastfiz_env.envs import FramesFastFiz, SimpleFastFiz, PocketsFastFiz -from fastfiz_env.reward_functions import reward_function -from fastfiz_env.reward_functions.default_reward import DefaultReward +from fastfiz_env.envs import FramesFastFiz, PocketsFastFiz from fastfiz_env.utils.fastfiz import ( create_random_table_state, - get_ball_positions, normalize_ball_positions, ) from fastfiz_env.envs.utils import game_won, possible_shot from stable_baselines3 import PPO -from typing import Optional, Callable +from typing import Optional import argparse from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper -from fastfiz_env.wrappers.utils import spherical_coordinates def get_play_config() -> dict: diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py index d18f464..6b5a59d 100644 --- a/src/fastfiz_env/envs/frames_fastfiz.py +++ b/src/fastfiz_env/envs/frames_fastfiz.py @@ -5,10 +5,8 @@ from fastfiz_env.envs.utils import game_won, terminal_state from ..utils.fastfiz import ( - shot_params_from_action, get_ball_positions, create_random_table_state, - get_ball_velocity, normalize_ball_positions, normalize_ball_velocity, is_pocketed_state, diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py index f1105d2..3de9ddf 100644 --- a/src/fastfiz_env/envs/simple_fastfiz.py +++ b/src/fastfiz_env/envs/simple_fastfiz.py @@ -4,7 +4,7 @@ import gymnasium as gym from gymnasium import spaces -from fastfiz_env.utils.fastfiz.fastfiz import num_balls_in_play, num_balls_pocketed +from fastfiz_env.utils.fastfiz.fastfiz import num_balls_pocketed from ..utils.fastfiz import ( create_random_table_state, diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py index 2265180..67e35d5 100644 --- a/src/fastfiz_env/envs/testing_fastfiz.py +++ b/src/fastfiz_env/envs/testing_fastfiz.py @@ -10,8 +10,6 @@ create_random_table_state, get_ball_positions, normalize_ball_positions, - shot_params_from_action, - action_to_shot, shotparams_to_string, ) from ..reward_functions import RewardFunction, DefaultReward diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py index 0b46333..0cc5260 100644 --- a/src/fastfiz_env/make.py +++ b/src/fastfiz_env/make.py @@ -1,4 +1,3 @@ -from typing import Optional from gymnasium.envs.registration import EnvSpec import gymnasium as gym diff --git a/src/fastfiz_env/reward_functions/binary_reward.py b/src/fastfiz_env/reward_functions/binary_reward.py index 86463fe..c6f127c 100644 --- a/src/fastfiz_env/reward_functions/binary_reward.py +++ b/src/fastfiz_env/reward_functions/binary_reward.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Callable, Optional, Union +from typing import Optional, Union import fastfiz as ff from .reward_function import RewardFunction, Weight import numpy as np diff --git a/src/fastfiz_env/reward_functions/common/constant_reward.py b/src/fastfiz_env/reward_functions/common/constant_reward.py index 0c763f1..11a3ece 100644 --- a/src/fastfiz_env/reward_functions/common/constant_reward.py +++ b/src/fastfiz_env/reward_functions/common/constant_reward.py @@ -1,4 +1,4 @@ -from ..reward_function import RewardFunction, Weight +from ..reward_function import RewardFunction import fastfiz as ff import numpy as np diff --git a/src/fastfiz_env/reward_functions/common/weights.py b/src/fastfiz_env/reward_functions/common/weights.py index 6b0622d..0590096 100644 --- a/src/fastfiz_env/reward_functions/common/weights.py +++ b/src/fastfiz_env/reward_functions/common/weights.py @@ -1,4 +1,3 @@ -from ..reward_function import Weight ConstantWeight = 1 diff --git a/src/optimize.py b/src/optimize.py index af61169..3ff5abf 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -6,14 +6,12 @@ import optuna import time import torch -import torch.nn as nn from fastfiz_env.make import make_callable_wrapped_env from fastfiz_env.reward_functions import DefaultReward, WinningReward, RewardFunction from optuna.pruners import MedianPruner from optuna.samplers import TPESampler from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.env_util import make_vec_env from typing import Any, Dict from hyperparams import params_to_kwargs diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py index 7e215e3..352aafc 100644 --- a/src/tests/utils/test_reward_functions.py +++ b/src/tests/utils/test_reward_functions.py @@ -2,7 +2,6 @@ import fastfiz as ff from fastfiz_env.reward_functions.common import * from fastfiz_env.reward_functions import CombinedReward -from fastfiz_env.reward_functions.reward_function import Weight from fastfiz_env.utils.fastfiz import create_table_state import numpy as np diff --git a/src/train.py b/src/train.py index ee48585..7157dcd 100644 --- a/src/train.py +++ b/src/train.py @@ -85,9 +85,9 @@ def train( tb_log_name=model_name, progress_bar=True, ) - print(f"Training finished.") + print("Training finished.") except KeyboardInterrupt: - print(f"Training interrupted.") + print("Training interrupted.") finally: model.save(model_path) print(f"Model saved: {model_path}") From 109eee58cfc711286e989037a291e3bca9064c3f Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 18:55:38 +0200 Subject: [PATCH 15/44] Fix ruff checks --- src/fastfiz_env/envs/frames_fastfiz.py | 35 +++--- src/fastfiz_env/envs/testing_fastfiz.py | 22 +++- .../reward_functions/default_reward.py | 14 ++- .../reward_functions/winning_reward.py | 13 ++- src/fastfiz_env/utils/fastfiz/fastfiz.py | 71 +++++++++--- src/tests/utils/test_reward_functions.py | 108 ++++++++++++++---- 6 files changed, 197 insertions(+), 66 deletions(-) diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py index 6b5a59d..4f13845 100644 --- a/src/fastfiz_env/envs/frames_fastfiz.py +++ b/src/fastfiz_env/envs/frames_fastfiz.py @@ -26,7 +26,9 @@ class FramesFastFiz(gym.Env): TOTAL_BALLS = 16 # Including the cue ball num_balls = 2 - def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16) -> None: + def __init__( + self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16 + ) -> None: super().__init__() if num_balls < 2: warnings.warn( @@ -43,11 +45,15 @@ def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: i def _max_episode_steps(self): if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None: - self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps") + self.max_episode_steps = self.get_wrapper_attr( + "_time_limit_max_episode_steps" + ) print(f"Setting max episode steps to {self.max_episode_steps}") self.reward.max_episode_steps = self.max_episode_steps - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: + def reset( + self, *, seed: Optional[int] = None, options: Optional[dict] = None + ) -> tuple[np.ndarray, dict]: """ Reset the environment to its initial state. """ @@ -129,18 +135,7 @@ def _observation_space(self): All values are in the range `[0, TABLE_WIDTH]` and `[0, TABLE_LENGTH]`. """ - table = self.table_state.getTable() - lower = np.full((self.TOTAL_BALLS, 4), [-1, -1, -1, 0]) - # upper = np.full( - # (self.TOTAL_BALLS, 4), - # [ - # table.TABLE_WIDTH, - # table.TABLE_LENGTH, - # self.table_state.MAX_VELOCITY * 1.580, - # 1, - # ], - # ) upper = np.full( (self.TOTAL_BALLS, 4), [1, 1, 1, 1], @@ -179,9 +174,14 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: """ Check if the shot is possible. """ - return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION + return ( + self.table_state.isPhysicallyPossible(shot_params) + == ff.TableState.OK_PRECONDITION + ) - def _compute_observation(self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]) -> np.ndarray: + def _compute_observation( + self, prev_table_state: ff.TableState, shot: Optional[ff.Shot] + ) -> np.ndarray: return self.compute_observation(prev_table_state, self.table_state, shot) @classmethod @@ -223,7 +223,8 @@ def compute_observation( pocketed = is_pocketed_state(gb.state) frames_seq[frame][gb.number] = [ *normalize_ball_positions((gb.position.x, gb.position.y)), # type: ignore - normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - 1, + normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 + - 1, pocketed, ] return frames_seq diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py index 67e35d5..cb39aac 100644 --- a/src/fastfiz_env/envs/testing_fastfiz.py +++ b/src/fastfiz_env/envs/testing_fastfiz.py @@ -66,10 +66,14 @@ def __init__( def _max_episode_steps(self): if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None: - self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps") + self.max_episode_steps = self.get_wrapper_attr( + "_time_limit_max_episode_steps" + ) self.reward.max_episode_steps = self.max_episode_steps - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: + def reset( + self, *, seed: Optional[int] = None, options: Optional[dict] = None + ) -> tuple[np.ndarray, dict]: super().reset(seed=seed) if self.max_episode_steps is None: @@ -90,7 +94,9 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) - observation = self._get_observation() info = self._get_info() - self.logger.info("Reset(%s) - initial observation:\n%s", self.n_episodes, observation) + self.logger.info( + "Reset(%s) - initial observation:\n%s", self.n_episodes, observation + ) self.logger.info("Reset(%s) - initial info: %s", self.n_episodes, info) self.n_episodes += 1 @@ -148,7 +154,9 @@ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict] def _get_observation(self): ball_positions = get_ball_positions(self.table_state)[: self.TOTAL_BALLS] # ball_positions = normalize_ball_positions(ball_positions) # Normalize to [0, 1] - ball_positions = normalize_ball_positions(ball_positions) * 2 - 1 # Normalize to [-1, 1] (symmetric) + ball_positions = ( + normalize_ball_positions(ball_positions) * 2 - 1 + ) # Normalize to [-1, 1] (symmetric) observation = np.zeros((self.TOTAL_BALLS, 2), dtype=np.float32) for i, ball_pos in enumerate(ball_positions): observation[i] = [*ball_pos] @@ -185,7 +193,6 @@ def _observation_space(self) -> spaces.Box: All values are in the range `[0, TABLE_WIDTH]` and `[0, TABLE_LENGTH]`. """ - table = self.table_state.getTable() lower = np.full((self.TOTAL_BALLS, 2), [-1, -1]) upper = np.full((self.TOTAL_BALLS, 2), [1, 1]) return spaces.Box(low=lower, high=upper, dtype=np.float32) @@ -214,4 +221,7 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: """ Check if the shot is possible. """ - return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION + return ( + self.table_state.isPhysicallyPossible(shot_params) + == ff.TableState.OK_PRECONDITION + ) diff --git a/src/fastfiz_env/reward_functions/default_reward.py b/src/fastfiz_env/reward_functions/default_reward.py index 5c1d647..fc0ce87 100644 --- a/src/fastfiz_env/reward_functions/default_reward.py +++ b/src/fastfiz_env/reward_functions/default_reward.py @@ -1,5 +1,17 @@ from .combined_reward import CombinedReward -from .common import * +from .common import ( + ConstantReward, + StepPocketedReward, + GameWonReward, + CueBallPocketedReward, + ConstantWeightBalls, + NegativeConstantWeightMaxSteps, + ConstantWeight, + NegativeConstantWeight, + ExponentialVelocityReward, + BallsNotMovedReward, +) + rewards = [ GameWonReward(ConstantWeight), diff --git a/src/fastfiz_env/reward_functions/winning_reward.py b/src/fastfiz_env/reward_functions/winning_reward.py index 235e93d..34994c1 100644 --- a/src/fastfiz_env/reward_functions/winning_reward.py +++ b/src/fastfiz_env/reward_functions/winning_reward.py @@ -1,5 +1,16 @@ from .combined_reward import CombinedReward -from .common import * +from .common import ( + ConstantReward, + StepPocketedReward, + GameWonReward, + CueBallPocketedReward, + ConstantWeightBalls, + NegativeConstantWeightMaxSteps, + ConstantWeight, + NegativeConstantWeight, + ExponentialVelocityReward, + StepNoBallsPocketedReward, +) rewards = [ GameWonReward(ConstantWeight), diff --git a/src/fastfiz_env/utils/fastfiz/fastfiz.py b/src/fastfiz_env/utils/fastfiz/fastfiz.py index d4c936c..78727f6 100644 --- a/src/fastfiz_env/utils/fastfiz/fastfiz.py +++ b/src/fastfiz_env/utils/fastfiz/fastfiz.py @@ -44,7 +44,13 @@ def num_balls_in_play(table_state: ff.TableState) -> int: Returns: int: The number of balls in play. """ - return len([i for i in range(table_state.getNumBalls()) if table_state.getBall(i).isInPlay()]) + return len( + [ + i + for i in range(table_state.getNumBalls()) + if table_state.getBall(i).isInPlay() + ] + ) def num_balls_pocketed( @@ -65,10 +71,14 @@ def num_balls_pocketed( int: The number of balls pocketed. """ stop = table_state.getNumBalls() if range_stop is None else range_stop - return len([i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()]) + return len( + [i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()] + ) -def any_ball_has_moved(prev_ball_positions: np.ndarray, ball_positions: np.ndarray) -> bool: +def any_ball_has_moved( + prev_ball_positions: np.ndarray, ball_positions: np.ndarray +) -> bool: """ Check if any ball has moved by comparing the previous ball positions with the current ball positions. @@ -166,7 +176,9 @@ def distance_to_closest_pocket(ball_position: np.ndarray, pockets: np.ndarray) - return np.min(distance_to_pockets(ball_position, pockets)) -def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) -> np.ndarray: +def distances_to_closest_pocket( + ball_positions: np.ndarray, pockets: np.ndarray +) -> np.ndarray: """ Calculates the distances from each ball position to the closest pocket. @@ -177,7 +189,12 @@ def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) Returns: np.ndarray: An array of distances from each ball position to the closest pocket. """ - return np.array([distance_to_closest_pocket(ball_position, pockets) for ball_position in ball_positions]) + return np.array( + [ + distance_to_closest_pocket(ball_position, pockets) + for ball_position in ball_positions + ] + ) def create_table_state(n_balls: int) -> ff.TableState: @@ -206,7 +223,9 @@ def create_table_state(n_balls: int) -> ff.TableState: return table_state -def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.TableState: +def create_random_table_state( + n_balls: int, seed: Optional[int] = None +) -> ff.TableState: """ Creates a random table state with the specified number of balls. @@ -222,7 +241,9 @@ def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.Ta return table_state -def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None) -> None: +def randomize_table_state( + table_state: ff.TableState, seed: Optional[int] = None +) -> ff.TableState: """ Randomizes the positions of the balls on the pool table within the given table state. @@ -276,7 +297,9 @@ def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None return table_state -def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray: +def map_action_to_shot_params( + table_state: ff.TableState, action: np.ndarray +) -> np.ndarray: """ Maps the given action values to the corresponding shot parameters within the specified ranges. @@ -288,15 +311,19 @@ def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray: The mapped shot parameters. """ - a = np.interp(action[0], [0, 0], [0, 0]) - b = np.interp(action[1], [0, 0], [0, 0]) - theta = np.interp(action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001]) + # a = np.interp(action[0], [0, 0], [0, 0]) + # b = np.interp(action[1], [0, 0], [0, 0]) + theta = np.interp( + action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001] + ) phi = np.interp(action[3], [-1, 1], [0, 360]) v = np.interp(action[4], [-1, 1], [0, table_state.MAX_VELOCITY - 0.001]) return np.array([0, 0, theta, phi, v], dtype=np.float64) -def shot_params_from_action(table_state: ff.TableState, action: np.ndarray) -> ff.ShotParams: +def shot_params_from_action( + table_state: ff.TableState, action: np.ndarray +) -> ff.ShotParams: """ Converts an action into shot parameters. @@ -322,11 +349,21 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam MAX_THETA = ff.TableState.MAX_THETA MAX_VELOCITY = ff.TableState.MAX_VELOCITY - a = np.interp(action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET]) - b = np.interp(action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET]) - theta = np.interp(action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA]) - phi = np.interp(action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI]) - velocity = np.interp(action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY]) + a = np.interp( + action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET] + ) + b = np.interp( + action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET] + ) + theta = np.interp( + action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA] + ) + phi = np.interp( + action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI] + ) + velocity = np.interp( + action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY] + ) # print(f"a: {a}, b: {b}, theta: {theta}, phi: {phi}, velocity: {velocity}") diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py index 352aafc..5caf0ff 100644 --- a/src/tests/utils/test_reward_functions.py +++ b/src/tests/utils/test_reward_functions.py @@ -1,6 +1,20 @@ import unittest import fastfiz as ff -from fastfiz_env.reward_functions.common import * +from fastfiz_env.reward_functions.common import ( + ConstantReward, + StepPocketedReward, + GameWonReward, + CueBallPocketedReward, + CueBallNotMovedReward, + ImpossibleShotReward, + DeltaBestTotalDistanceReward, + TotalDistanceReward, + ConstantWeightBalls, + ConstantWeightMaxSteps, + NegativeConstantWeightMaxSteps, + ConstantWeightNumBalls, + ConstantWeightCurrentStep, +) from fastfiz_env.reward_functions import CombinedReward from fastfiz_env.utils.fastfiz import create_table_state import numpy as np @@ -13,7 +27,9 @@ def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float class TestRewardFunctions(unittest.TestCase): - possible_shot_action = np.array([0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64) + possible_shot_action = np.array( + [0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64 + ) impossible_shot_action = np.array([0, 0, 0, 0, 0], dtype=np.float64) empty_action = np.array([], dtype=np.float64) @@ -24,8 +40,12 @@ def test_step_pocketed_reward(self): reward = StepPocketedReward() reward.reset(table_state) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) - self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 0 + ) + self.assertEqual( + reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 + ) def test_game_won_reward(self): table_state = create_table_state(3) @@ -38,8 +58,12 @@ def test_game_won_reward(self): reward = GameWonReward() reward.reset(table_state) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) - self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 0 + ) + self.assertEqual( + reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 + ) def test_constant_reward(self): table_state = create_table_state(2) @@ -47,8 +71,12 @@ def test_constant_reward(self): reward = ConstantReward(weight=weight_fn, max_episode_steps=10) reward.reset(table_state) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10 + ) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10 + ) def test_cue_ball_pocketed_reward(self): table_state = create_table_state(2) @@ -58,8 +86,12 @@ def test_cue_ball_pocketed_reward(self): reward = CueBallPocketedReward() reward.reset(table_state) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) - self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 0 + ) + self.assertEqual( + reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 + ) def test_cue_ball_not_moved_reward(self): table_state = create_table_state(2) @@ -69,17 +101,27 @@ def test_cue_ball_not_moved_reward(self): reward = CueBallNotMovedReward() reward.reset(table_state) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1) - self.assertEqual(reward.get_reward(table_state, table_state_moved, self.empty_action), 0) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 1 + ) + self.assertEqual( + reward.get_reward(table_state, table_state_moved, self.empty_action), 0 + ) def test_impossible_shot_reward(self): table_state = create_table_state(2) reward = ImpossibleShotReward() reward.reset(table_state) - self.assertEqual(reward.get_reward(table_state, table_state, self.possible_shot_action), 0) - self.assertEqual(reward.get_reward(table_state, table_state, self.impossible_shot_action), 1) - self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1) + self.assertEqual( + reward.get_reward(table_state, table_state, self.possible_shot_action), 0 + ) + self.assertEqual( + reward.get_reward(table_state, table_state, self.impossible_shot_action), 1 + ) + self.assertEqual( + reward.get_reward(table_state, table_state, self.empty_action), 1 + ) def test_delta_best_total_distance_reward(self): # Pocket: [0., 1.118] @@ -91,7 +133,9 @@ def test_delta_best_total_distance_reward(self): table_state = create_table_state(2) table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118)) - self.assertEqual(reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0) + self.assertEqual( + reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0 + ) self.assertEqual( reward.get_reward(prev_table_state, table_state, self.empty_action), 0.25, @@ -141,7 +185,9 @@ def test_combined_reward(self): table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118)) self.assertEqual( - reward_function.get_reward(prev_table_state, table_state, self.empty_action), + reward_function.get_reward( + prev_table_state, table_state, self.empty_action + ), 3.3, ) @@ -150,16 +196,22 @@ def test_binary_reward_no_short_circuit(self): ImpossibleShotReward(10, short_circuit=False), ConstantReward(5), ] - reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True) + reward_function = CombinedReward( + reward_functions=rewards_functions, short_circuit=True + ) table_state = create_table_state(2) reward_function.reset(table_state) self.assertEqual( - reward_function.get_reward(table_state, table_state, self.possible_shot_action), + reward_function.get_reward( + table_state, table_state, self.possible_shot_action + ), 5, ) self.assertEqual( - reward_function.get_reward(table_state, table_state, self.impossible_shot_action), + reward_function.get_reward( + table_state, table_state, self.impossible_shot_action + ), 15, ) @@ -168,22 +220,30 @@ def test_binary_reward_short_circuit(self): ImpossibleShotReward(10, short_circuit=True), ConstantReward(5), ] - reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True) + reward_function = CombinedReward( + reward_functions=rewards_functions, short_circuit=True + ) table_state = create_table_state(2) reward_function.reset(table_state) self.assertEqual( - reward_function.get_reward(table_state, table_state, self.possible_shot_action), + reward_function.get_reward( + table_state, table_state, self.possible_shot_action + ), 5, ) self.assertEqual( - reward_function.get_reward(table_state, table_state, self.impossible_shot_action), + reward_function.get_reward( + table_state, table_state, self.impossible_shot_action + ), 10, ) def test_weights(self): table_state = create_table_state(3) - reward = ConstantReward(weight=NegativeConstantWeightMaxSteps, max_episode_steps=10) + reward = ConstantReward( + weight=NegativeConstantWeightMaxSteps, max_episode_steps=10 + ) reward.reset(table_state) self.assertEqual( From 2d3f7fa15d56ea072ae2a4adf00d4e75a7127616 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:43:48 +0200 Subject: [PATCH 16/44] Fix coordinate calculations --- src/fastfiz_env/wrappers/action.py | 191 ++++++++++++----------------- src/fastfiz_env/wrappers/utils.py | 53 ++++---- 2 files changed, 111 insertions(+), 133 deletions(-) diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py index bb59399..cd869f0 100644 --- a/src/fastfiz_env/wrappers/action.py +++ b/src/fastfiz_env/wrappers/action.py @@ -1,8 +1,4 @@ -from .utils import ( - vec_to_abs_deg, - vec_length, - spherical_coordinates, -) +from .utils import vec_to_abs_deg, cart2sph, sph2deg from gymnasium import ActionWrapper from gymnasium import spaces import numpy as np @@ -10,60 +6,59 @@ class ActionSpaces(Enum): - NO_OFFSET_5D = (0,) - """No a and b offset, 5D representation of cue stick: - - a: 0 - - b: 0 - - theta: The angle of the shot in the yz-plane (0th and 1st element). - - phi: The angle of the in the xz-plane (2nd and 3rd element). - - velocity: 5th element. + VECTOR_2D = (0,) """ - NO_OFFSET_4D = (1,) - """No a and b offset, 4D representation of cue stick: - - a: 0 - - b: 0 - - theta: The angle of the shot in the yz-plane (0th and 1st element). - - phi: The angle of the in the xz-plane (2nd and 3rd element). - - velocity: Derived from the unit vector of theta + phi. + 2D vector representation of cue stick: + - a: Always 0. + - b: Always 11. + - theta: Always 20. + - phi: Angle between the 2D vector and the x-axis. + - velocity: Magnitude of the 2D vector. """ - - NO_OFFSET_3D = (2,) + VECTOR_3D = (1,) """ - No a and b offset, 3D representation of cue stick: - - a: 0 - - b: 0 - - theta: The angle of the shot in the yz-plane (0th and 1st element). - - phi: The angle of the in the xz-plane (1st and 2nd element). - - velocity: Derived from the 3D vector. + 3D vector representation of cue stick: + - a: Always 0. + - b: Always 0. + - theta: Derived from the 3D vector spherical coordinates. + - phi: Derived from the 3D vector spherical coordinates. + - velocity: Magnitude of the 3D vector. """ - - NORM_PARAMS_5D = (3,) + NORM_3D = (2,) """ - Normalized shot paramaters, 5D representation of cue stick: - - a: The offset of the cue ball in the x-coordinate. (Always 0) - - b: The offset of the cue ball in the y-coordinate. (Always 0) - - theta: The angle of the shot in the yz-plane. - - phi: The angle of the in the xz-plane. - - velocity: The velocity of the shot. + Normalized shot paramaters, 3D representation of cue stick: + - a: Always 0. + - b: Always 0. + - theta: Normalized angle from `MIN_THETA` to `MAX_THETA`. + - phi: Normalized angle from `MIN_PHI` to `MAX_PHI`. + - velocity: Normalized velocity from `MIN_VELOCITY` to `MAX_VELOCITY`. """ - - NO_OFFSET_NORM_PARAMS_3D = (4,) + NORM_5D = (3,) """ Normalized shot paramaters, 5D representation of cue stick: - - a: 0 - - b: 0 - - theta: The angle of the shot in the yz-plane. - - phi: The angle of the in the xz-plane. - - velocity: The velocity of the shot. + - a: Alwyas 0. + - b: Always 0. + - theta: Normalized angle from `MIN_THETA` to `MAX_THETA`. + - phi: Normalized angle from `MIN_PHI` to `MAX_PHI`. + - velocity: Normalized velocity from `MIN_VELOCITY` to `MAX_VELOCITY`. """ - VECTOR_2D = (5,) + OFFSET_NORM_5D = (4,) """ - 2D vector + Normalized shot paramaters, 5D representation of cue stick: + - a: Normalized value from `MIN_OFFSET` to `MAX_OFFSET`. + - b: Normalized value from `MIN_OFFSET` to `MAX_OFFSET`. + - theta: Normalized angle from `MIN_THETA` to `MAX_THETA`. + - phi: Normalized angle from `MIN_PHI` to `MAX_PHI`. + - velocity: Normalized velocity from `MIN_VELOCITY` to `MAX_VELOCITY`. """ - - OUTPUT = (6,) + OUTPUT_5D = (5,) """ - Output of FastFizActionWrapper. + 5D representation of cue stick, using original FastFiz shot parameter values: + - a: Offset of the cue ball in the x-coordinate. + - b: Offset of the cue ball in the y-coordinate. + - theta: The vertical angle of the shot. + - phi: The horizontal angle of the shot. + - velocity: The power of the shot (in m/s). """ @@ -71,39 +66,36 @@ class FastFizActionWrapper(ActionWrapper): MIN_THETA = 0 MAX_THETA = 70 - 0.001 MIN_PHI = 0 - MAX_PHI = 360 + MAX_PHI = 360 - 0.001 MIN_VELOCITY = 0 - MAX_VELOCITY = 10 + MAX_VELOCITY = 10 - 0.001 + MIN_OFFSET = -15 + MAX_OFFSET = 15 SPACES = { - "NO_OFFSET_3D": spaces.Box( - low=np.array([-1, -1, -1]), - high=np.array([1, 1, 1]), + "VECTOR_2D": spaces.Box( + low=np.array([-1, -1]), + high=np.array([1, 1]), + shape=(2,), dtype=np.float32, ), - "NO_OFFSET_4D": spaces.Box( - low=np.array([-1, -1, -1, -1]), - high=np.array([1, 1, 1, 1]), + "VECTOR_3D": spaces.Box( + low=np.array([-1, -1, -1]), + high=np.array([1, 1, 1]), dtype=np.float32, ), - "NO_OFFSET_5D": spaces.Box( - low=np.array([-1, -1, -1, -1, -1]), - high=np.array([1, 1, 1, 1, 1]), + "NORM_3D": spaces.Box( + low=np.array([-1, -1, -1]), + high=np.array([1, 1, 1]), dtype=np.float32, ), - "NORM_PARAMS_5D": spaces.Box( + "NORM_5D": spaces.Box( low=np.array([0, 0, -1, -1, -1]), high=np.array([0, 0, 1, 1, 1]), dtype=np.float32, ), - "NO_OFFSET_NORM_PARAMS_3D": spaces.Box( - low=np.array([-1, -1, -1]), - high=np.array([1, 1, 1]), - dtype=np.float32, - ), - "VECTOR_2D": spaces.Box( - low=np.array([-1, -1]), - high=np.array([1, 1]), - shape=(2,), + "OFFSET_NORM_5D": spaces.Box( + low=np.array([-1, -1, -1, -1, -1]), + high=np.array([1, 1, 1, 1, 1]), dtype=np.float32, ), "OUTPUT": spaces.Box( @@ -124,65 +116,42 @@ def __init__( self.action_space = self.SPACES[action_space_id.name] def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[float, np.dtype[np.float32]]: - # Offset a and b are always 0 offset_a = 0 offset_b = 0 match self.action_space_id: - case ActionSpaces.NO_OFFSET_4D: - if np.allclose(action, 0): - return np.array([offset_a, offset_b, 0, 0, 0]) - vec_theta = action[:2] - theta = vec_to_abs_deg(vec_theta) - theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA)) - - vec_phi = action[2:4] - phi = vec_to_abs_deg(vec_phi) - - vec_velocity = vec_length(vec_theta + vec_phi) - velocity = np.interp(vec_velocity, (0, 2), (self.MIN_VELOCITY, self.MAX_VELOCITY)) - case ActionSpaces.VECTOR_2D: - if np.allclose(action, 0): - return np.array([offset_a, offset_b, 0, 0, 0]) theta = 20 - phi = np.degrees(np.arctan2(action[1], action[0])) % 360 - # phi = np.interp(theta, (0, 360), (self.MIN_PHI, self.MAX_PHI)) + phi = float(np.degrees(np.arctan2(action[1], action[0])) % self.MAX_PHI) offset_b = 11 - velocity = np.hypot(*action) velocity = np.interp( - velocity, + np.hypot(*action), (0, np.sqrt(2)), (self.MIN_VELOCITY, self.MAX_VELOCITY - 5), ) - - case ActionSpaces.NO_OFFSET_3D: - if np.allclose(action, 0): - return np.array([offset_a, offset_b, 0, 0, 0]) - r, theta, phi = spherical_coordinates(action) - theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA)) - phi = np.interp(phi, (0, 360), (self.MIN_PHI, self.MAX_PHI)) + case ActionSpaces.VECTOR_3D: + x, y, z = action + az, el, r = cart2sph(x, y, z) + phi, theta, r = sph2deg(az, el, r) + theta = float(np.interp(theta, (0, np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA))) + phi = float(np.interp(phi, (0, np.rad2deg(2 * np.pi)), (self.MIN_PHI, self.MAX_PHI))) velocity = np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)) - - case ActionSpaces.NO_OFFSET_5D: - if np.allclose(action, 0): - return np.array([offset_a, offset_b, 0, 0, 0]) - vec_theta = action[:2] - theta = vec_to_abs_deg(vec_theta) - theta = np.interp(theta, (0, 360), (self.MIN_THETA, self.MAX_THETA)) - - vec_phi = action[2:4] - phi = vec_to_abs_deg(vec_phi) - + case ActionSpaces.NORM_3D: + theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) + phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) + velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) + case ActionSpaces.NORM_5D: + theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) + phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) - case ActionSpaces.NORM_PARAMS_5D: + case ActionSpaces.OFFSET_NORM_5D: + offset_a = np.interp(action[0], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET)) + offset_b = np.interp(action[1], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET)) theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) - case ActionSpaces.NO_OFFSET_NORM_PARAMS_3D: - theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) - phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) + + velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) action = np.array([offset_a, offset_b, theta, phi, velocity]) return action diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py index 5f68dca..9a89002 100644 --- a/src/fastfiz_env/wrappers/utils.py +++ b/src/fastfiz_env/wrappers/utils.py @@ -13,7 +13,7 @@ def vec_to_deg(vec: np.ndarray) -> float: """ Gets the angle of a vector. """ - return np.rad2deg(np.arctan2(vec[1], vec[0])) + return float(np.rad2deg(np.arctan2(vec[1], vec[0]))) def vec_to_abs_deg(vec: np.ndarray) -> float: @@ -23,42 +23,51 @@ def vec_to_abs_deg(vec: np.ndarray) -> float: return vec_to_deg(vec) % 360 -def vec_length(vec: np.ndarray) -> float: +def vec_mag(vec: np.ndarray) -> float: """ Gets the length of a vector. """ - return np.linalg.norm(vec) + return float(np.linalg.norm(vec)) -def vec_normalize(vec: np.ndarray) -> np.ndarray: +def vec_norm(vec: np.ndarray) -> np.ndarray: """ Gets the unit vector of a vector. """ - return vec / vec_length(vec) + return vec / vec_mag(vec) -def spherical_coordinates(vector: np.ndarray) -> tuple[float, float, float]: +def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]: """ - Converts a vector to spherical coordinates. + Convert Cartesian coordinates to spherical coordinates. + + Args: + x (float): x-coordinate. + y (float): y-coordinate. + z (float): z-coordinate. Returns: - r: float - The magnitude of the vector. - theta: float - The angle from the z-axis. - phi: float - The angle in the xy-plane. + tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius. """ + hxy: float = np.hypot(x, y) + r: float = np.hypot(hxy, z) + el: float = np.arctan2(z, hxy) + az: float = np.arctan2(y, x) + return az, el, r - assert len(vector) == 3, "Vector must have excatly 3 components." - Vx, Vy, Vz = vector - - theta = np.arccos(Vz / np.linalg.norm(vector)) - - phi = np.arctan2(Vy, Vx) # Using arctan2 to get correct quadrant - phi = (phi + 2 * np.pi) % (2 * np.pi) - - r = np.linalg.norm(vector) - return r, np.degrees(theta), np.degrees(phi) +def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]: + """ + Convert spherical coordinates to degrees. + Args: + az (float): Azimuth angle in radians. + el (float): Elevation angle in radians. + r (float): Radius. -def vec_magnitude(vector): - return np.linalg.norm(vector) + Returns: + tuple[float, float, float]: A tuple containing azimuth angle (phi, in degrees), elevation angle (theta, in degrees), and radius. + """ + phi: float = np.rad2deg(az % (2 * np.pi)) + theta: float = np.rad2deg(el % np.pi) + return phi, theta, r From 5744e7438a979f06e2077c5a67112c90c2afe7e9 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:45:25 +0200 Subject: [PATCH 17/44] Remove TestingFastFiz env --- src/fastfiz_env/__init__.py | 7 - src/fastfiz_env/envs/__init__.py | 2 - src/fastfiz_env/envs/testing_fastfiz.py | 227 ------------------------ 3 files changed, 236 deletions(-) delete mode 100644 src/fastfiz_env/envs/testing_fastfiz.py diff --git a/src/fastfiz_env/__init__.py b/src/fastfiz_env/__init__.py index 2be325b..5f949cf 100644 --- a/src/fastfiz_env/__init__.py +++ b/src/fastfiz_env/__init__.py @@ -3,7 +3,6 @@ Avaliable environments: - `SimpleFastFiz-v0`: Observes the position of the balls. - - `TestingFastFiz-v0`: Observes the position of the balls. Used for testing purposes with options e.g. seed, logging, action_space_id. - `FramesFastFiz-v0`: Observes the position of the balls and the frames of the simulation. - `PocketsFastFiz-v0`: Observes the position of the balls and in play state. Pocketed balls position always corresponds to given pocket center. @@ -57,12 +56,6 @@ ) -register( - id="TestingFastFiz-v0", - entry_point="fastfiz_env.envs:TestingFastFiz", - additional_wrappers=(wrappers.TimeLimitInjectionWrapper.wrapper_spec(),), -) - register( id="FramesFastFiz-v0", entry_point="fastfiz_env.envs:FramesFastFiz", diff --git a/src/fastfiz_env/envs/__init__.py b/src/fastfiz_env/envs/__init__.py index b183270..f756e1e 100644 --- a/src/fastfiz_env/envs/__init__.py +++ b/src/fastfiz_env/envs/__init__.py @@ -4,14 +4,12 @@ from . import utils from .simple_fastfiz import SimpleFastFiz -from .testing_fastfiz import TestingFastFiz from .frames_fastfiz import FramesFastFiz from .pockets_fastfiz import PocketsFastFiz __all__ = [ "utils", "SimpleFastFiz", - "TestingFastFiz", "FramesFastFiz", "PocketsFastFiz", ] diff --git a/src/fastfiz_env/envs/testing_fastfiz.py b/src/fastfiz_env/envs/testing_fastfiz.py deleted file mode 100644 index cb39aac..0000000 --- a/src/fastfiz_env/envs/testing_fastfiz.py +++ /dev/null @@ -1,227 +0,0 @@ -import os -import numpy as np -import gymnasium as gym -from gymnasium import spaces -from typing import Optional -from ..wrappers import ActionSpaces, FastFizActionWrapper - -from fastfiz_env.utils.fastfiz.fastfiz import table_state_to_string -from ..utils.fastfiz import ( - create_random_table_state, - get_ball_positions, - normalize_ball_positions, - shotparams_to_string, -) -from ..reward_functions import RewardFunction, DefaultReward -import fastfiz as ff -import logging -import time - - -class TestingFastFiz(gym.Env): - """FastFiz environment for testing.""" - - TOTAL_BALLS = 16 - - def __init__( - self, - reward_function: RewardFunction = DefaultReward, - num_balls: int = 16, - *, - options: Optional[dict] = None, - ) -> None: - super().__init__() - self.options = options - self.num_balls = num_balls - self.table_state = create_random_table_state(self.num_balls) - self.observation_space = self._observation_space() - action_space_id = self.options.get("action_space_id", ActionSpaces.NO_OFFSET_3D) - self.action_space = FastFizActionWrapper.get_action_space(action_space_id) - self.max_episode_steps = None - self.reward = reward_function - - # Logging - self.logger = logging.getLogger(__name__) - logs_dir = self.options.get("logs_dir", "") - os.makedirs(logs_dir, exist_ok=True) - logging.basicConfig( - filename=os.path.join(logs_dir, f"{time.strftime('%m-%d_%H:%M:%S')}.log"), - filemode="a", - format="%(asctime)s - %(levelname)s - %(message)s", - datefmt="%H:%M:%S", - level=self.options.get("log_level", logging.INFO), - ) - - self.n_episodes = 0 - self.n_step = 0 - - self.logger.info( - "TestFastFiz initialized with:\n- balls: %s\n- rewards: %s\n- options: %s\n- action space: %s\n- observation space: %s", - self.num_balls, - self.reward, - self.options, - self.action_space, - self.observation_space, - ) - - def _max_episode_steps(self): - if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None: - self.max_episode_steps = self.get_wrapper_attr( - "_time_limit_max_episode_steps" - ) - self.reward.max_episode_steps = self.max_episode_steps - - def reset( - self, *, seed: Optional[int] = None, options: Optional[dict] = None - ) -> tuple[np.ndarray, dict]: - super().reset(seed=seed) - - if self.max_episode_steps is None: - self._max_episode_steps() - - seed = self.options.get("seed", None) - self.logger.info("Reset(%s) - total n_steps: %s", self.n_episodes, self.n_step) - self.logger.info("Reset(%s) - table state seed: %s", self.n_episodes, seed) - self.table_state = create_random_table_state(self.num_balls, seed=seed) - self.reward.reset(self.table_state) - - self.logger.info( - "Reset(%s) - table state:\n%s", - self.n_episodes, - table_state_to_string(self.table_state), - ) - - observation = self._get_observation() - info = self._get_info() - - self.logger.info( - "Reset(%s) - initial observation:\n%s", self.n_episodes, observation - ) - self.logger.info("Reset(%s) - initial info: %s", self.n_episodes, info) - - self.n_episodes += 1 - self.n_step = 0 - - return observation, info - - def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]: - """ - Execute an action in the environment. - """ - - prev_table_state = ff.TableState(self.table_state) - # action_space = spaces.Box( - # low=np.array([0, 0, -1, -1, -1]), - # high=np.array([0, 0, 1, 1, 1]), - # dtype=np.float32, - # ) - # shot_params = action_to_shot([0, 0, *action], action_space) - - shot_params = ff.ShotParams(*action) - - self.logger.info( - "Step(%s) - Action:\n- action: %s\n- shot params: %s", - self.n_step, - action, - shotparams_to_string(shot_params), - ) - - impossible_shot = not self._possible_shot(shot_params) - - self.logger.info("Step(%s) - impossible shot: %s", self.n_step, impossible_shot) - - if not impossible_shot: - self.table_state.executeShot(shot_params) - - observation = self._get_observation() - - reward = self.reward.get_reward(prev_table_state, self.table_state, action) - - terminated = self._is_terminal_state() - truncated = False - info = self._get_info() - - self.logger.debug("Step(%s) - observation:\n%s", self.n_step, observation) - self.logger.info("Step(%s) - reward: %s", self.n_step, reward) - self.logger.info("Step(%s) - terminated: %s", self.n_step, terminated) - self.logger.debug("Step(%s) - truncated: %s", self.n_step, truncated) - self.logger.info("Step(%s) - info: %s", self.n_step, info) - - self.n_step += 1 - - return observation, reward, terminated, truncated, info - - def _get_observation(self): - ball_positions = get_ball_positions(self.table_state)[: self.TOTAL_BALLS] - # ball_positions = normalize_ball_positions(ball_positions) # Normalize to [0, 1] - ball_positions = ( - normalize_ball_positions(ball_positions) * 2 - 1 - ) # Normalize to [-1, 1] (symmetric) - observation = np.zeros((self.TOTAL_BALLS, 2), dtype=np.float32) - for i, ball_pos in enumerate(ball_positions): - observation[i] = [*ball_pos] - - return np.array(observation) - - def _get_info(self): - return { - "is_success": self._game_won(), - } - - def _is_terminal_state(self) -> bool: - if self.table_state.getBall(0).isPocketed(): - return True - - return self._game_won() - - def _game_won(self) -> bool: - if self.table_state.getBall(0).isPocketed(): - return False - - for i in range(1, self.num_balls): - if not self.table_state.getBall(i).isPocketed(): - return False - return True - - def _observation_space(self) -> spaces.Box: - """ - Get the observation space of the environment. - - The observation space is a 16-dimensional box with the position of each ball: - - x: The x-coordinate of the ball. - - y: The y-coordinate of the ball. - - All values are in the range `[0, TABLE_WIDTH]` and `[0, TABLE_LENGTH]`. - """ - lower = np.full((self.TOTAL_BALLS, 2), [-1, -1]) - upper = np.full((self.TOTAL_BALLS, 2), [1, 1]) - return spaces.Box(low=lower, high=upper, dtype=np.float32) - - def _action_space(self) -> spaces.Box: - """ - Get the action space of the environment. - - The action space is a 5-dimensional box: - - a-offset: The offset of the cue ball in the x-coordinate. - - b-offset: The offset of the cue ball in the y-coordinate. - - theta: The angle of the shot in the yz-plane. - - phi: The angle of the shot. - - velocity: The power of the shot. - - All values are in the range `[0, 1]`. - """ - # return spaces.Box( - # low=np.array([-1, -1, -1]), - # high=np.array([1, 1, 1]), - # dtype=np.float32, - # ) - return FastFizActionWrapper.get_action_space(ActionSpaces.NO_OFFSET_4D) - - def _possible_shot(self, shot_params: ff.ShotParams) -> bool: - """ - Check if the shot is possible. - """ - return ( - self.table_state.isPhysicallyPossible(shot_params) - == ff.TableState.OK_PRECONDITION - ) From dba9b07df58e239e6f23f3037ff19c65f33b53cd Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:45:59 +0200 Subject: [PATCH 18/44] Rename action space --- src/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eval.py b/src/eval.py index 7f64133..342b78b 100644 --- a/src/eval.py +++ b/src/eval.py @@ -111,7 +111,7 @@ def main() -> None: # env_vec = fastfiz_env.make("SimpleFastFiz-v0", reward_function=DefaultReward) # env_vec = FastFizActionWrapper(env_vec, ActionSpaces.NO_OFFSET_3D) - env = FastFizActionWrapper(PocketsFastFiz, ActionSpaces.NO_OFFSET_3D) + env = FastFizActionWrapper(PocketsFastFiz, ActionSpaces.VECTOR_3D) agent = Agent(model, env) play(agent.decide_shot, balls=2, episodes=100) From d766fba73721f36c63c6cf718614872d094948a8 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:46:40 +0200 Subject: [PATCH 19/44] Rename action space --- src/fastfiz_env/make.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py index 0cc5260..7fa8b2a 100644 --- a/src/fastfiz_env/make.py +++ b/src/fastfiz_env/make.py @@ -56,7 +56,7 @@ def make_wrapped_env( disable_env_checker=False, **kwargs, ) - env = FastFizActionWrapper(env, action_space_id=ActionSpaces.NO_OFFSET_3D) + env = FastFizActionWrapper(env, action_space_id=ActionSpaces.VECTOR_3D) return env From da7b09432f6e7ec46cb045e86bf030b8e70410db Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:47:39 +0200 Subject: [PATCH 20/44] Fix type hints --- src/fastfiz_env/reward_functions/combined_reward.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fastfiz_env/reward_functions/combined_reward.py b/src/fastfiz_env/reward_functions/combined_reward.py index eb4bb4b..1bf24ca 100644 --- a/src/fastfiz_env/reward_functions/combined_reward.py +++ b/src/fastfiz_env/reward_functions/combined_reward.py @@ -1,3 +1,4 @@ +from typing import Optional from .reward_function import RewardFunction, Weight from .binary_reward import BinaryReward import fastfiz as ff @@ -12,7 +13,7 @@ class CombinedReward(RewardFunction): def __init__( self, weight: Weight = 1, - max_episode_steps: int = None, + max_episode_steps: Optional[int] = None, *, reward_functions: list[RewardFunction], short_circuit: bool = False, @@ -37,11 +38,11 @@ def __init__( self.max_episode_steps = max_episode_steps @property - def max_episode_steps(self) -> int: + def max_episode_steps(self) -> Optional[int]: return self._max_episode_steps @max_episode_steps.setter - def max_episode_steps(self, value: int) -> None: + def max_episode_steps(self, value: Optional[int]) -> None: self._max_episode_steps = value for reward in self.reward_functions: reward.max_episode_steps = value From d09391fa629730a09a92b2d68360205a56bd3fd0 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:50:42 +0200 Subject: [PATCH 21/44] Fix type hints --- src/fastfiz_env/wrappers/action.py | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py index cd869f0..7e4dedc 100644 --- a/src/fastfiz_env/wrappers/action.py +++ b/src/fastfiz_env/wrappers/action.py @@ -116,18 +116,20 @@ def __init__( self.action_space = self.SPACES[action_space_id.name] def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[float, np.dtype[np.float32]]: - offset_a = 0 - offset_b = 0 + offset_a = 0.0 + offset_b = 0.0 match self.action_space_id: case ActionSpaces.VECTOR_2D: - theta = 20 + theta = 20.0 phi = float(np.degrees(np.arctan2(action[1], action[0])) % self.MAX_PHI) - offset_b = 11 - velocity = np.interp( - np.hypot(*action), - (0, np.sqrt(2)), - (self.MIN_VELOCITY, self.MAX_VELOCITY - 5), + offset_b = 11.0 + velocity = float( + np.interp( + np.hypot(*action), + (0, np.sqrt(2)), + (self.MIN_VELOCITY, self.MAX_VELOCITY - 5), + ) ) case ActionSpaces.VECTOR_3D: x, y, z = action @@ -135,23 +137,21 @@ def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[ phi, theta, r = sph2deg(az, el, r) theta = float(np.interp(theta, (0, np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA))) phi = float(np.interp(phi, (0, np.rad2deg(2 * np.pi)), (self.MIN_PHI, self.MAX_PHI))) - velocity = np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY)) + velocity = float(np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY))) case ActionSpaces.NORM_3D: - theta = np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) - phi = np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) + theta = float(np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))) + phi = float(np.interp(action[1], (-1, 1), (self.MIN_PHI, self.MAX_PHI))) + velocity = float(np.interp(action[2], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))) case ActionSpaces.NORM_5D: - theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) - phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) + theta = float(np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))) + phi = float(np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))) + velocity = float(np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))) case ActionSpaces.OFFSET_NORM_5D: - offset_a = np.interp(action[0], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET)) - offset_b = np.interp(action[1], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET)) - theta = np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA)) - phi = np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI)) - velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) - - velocity = np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY)) + offset_a = float(np.interp(action[0], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET))) + offset_b = float(np.interp(action[1], (-1, 1), (self.MIN_OFFSET, self.MAX_OFFSET))) + theta = float(np.interp(action[2], (-1, 1), (self.MIN_THETA, self.MAX_THETA))) + phi = float(np.interp(action[3], (-1, 1), (self.MIN_PHI, self.MAX_PHI))) + velocity = float(np.interp(action[4], (-1, 1), (self.MIN_VELOCITY, self.MAX_VELOCITY))) action = np.array([offset_a, offset_b, theta, phi, velocity]) return action From e4239638e0bee9881fded936b08f2d955ce45164 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:57:06 +0200 Subject: [PATCH 22/44] Check binary reward function instance --- src/fastfiz_env/reward_functions/combined_reward.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fastfiz_env/reward_functions/combined_reward.py b/src/fastfiz_env/reward_functions/combined_reward.py index 1bf24ca..29e7140 100644 --- a/src/fastfiz_env/reward_functions/combined_reward.py +++ b/src/fastfiz_env/reward_functions/combined_reward.py @@ -70,12 +70,12 @@ def reward( float: The combined, weighted reward. """ - total_reward = 0 + total_reward = 0.0 for reward_function in self.reward_functions: reward = reward_function.get_reward(prev_table_state, table_state, action) total_reward += reward - if issubclass(reward_function.__class__, BinaryReward): + if isinstance(reward_function, BinaryReward): if reward == 1 * reward_function.weight() and self.short_circuit and reward_function.short_circuit: return total_reward From 8971d0a6a6dfcd01637206047662b3dcf40ba8fb Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:57:24 +0200 Subject: [PATCH 23/44] Remove type hint --- src/fastfiz_env/utils/fastfiz/renderer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fastfiz_env/utils/fastfiz/renderer.py b/src/fastfiz_env/utils/fastfiz/renderer.py index 1a44b29..9a64b3e 100644 --- a/src/fastfiz_env/utils/fastfiz/renderer.py +++ b/src/fastfiz_env/utils/fastfiz/renderer.py @@ -83,7 +83,6 @@ def _get_relevant_ball_states_from_shot(self, shot: ff.Shot): relevant_states: list[_BallState] = [] for event in shot.getEventList(): - event: ff.Event if event.getBall1() == self.number: new_ball_event = _BallState.from_event_and_ball(event, event.getBall1Data()) relevant_states.append(new_ball_event) From d8d53a560f603124d7392ca6e8768edc93c537b9 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:57:36 +0200 Subject: [PATCH 24/44] Fix type hints --- src/fastfiz_env/utils/fastfiz/fastfiz.py | 76 ++++++------------------ 1 file changed, 18 insertions(+), 58 deletions(-) diff --git a/src/fastfiz_env/utils/fastfiz/fastfiz.py b/src/fastfiz_env/utils/fastfiz/fastfiz.py index 78727f6..8c65aeb 100644 --- a/src/fastfiz_env/utils/fastfiz/fastfiz.py +++ b/src/fastfiz_env/utils/fastfiz/fastfiz.py @@ -30,8 +30,7 @@ def get_ball_positions(table_state: ff.TableState) -> np.ndarray: for i in range(table_state.getNumBalls()): pos = table_state.getBall(i).getPos() balls.append((pos.x, pos.y)) - balls = np.array(balls) - return balls + return np.array(balls) def num_balls_in_play(table_state: ff.TableState) -> int: @@ -44,13 +43,7 @@ def num_balls_in_play(table_state: ff.TableState) -> int: Returns: int: The number of balls in play. """ - return len( - [ - i - for i in range(table_state.getNumBalls()) - if table_state.getBall(i).isInPlay() - ] - ) + return len([i for i in range(table_state.getNumBalls()) if table_state.getBall(i).isInPlay()]) def num_balls_pocketed( @@ -71,14 +64,10 @@ def num_balls_pocketed( int: The number of balls pocketed. """ stop = table_state.getNumBalls() if range_stop is None else range_stop - return len( - [i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()] - ) + return len([i for i in range(range_start, stop) if table_state.getBall(i).isPocketed()]) -def any_ball_has_moved( - prev_ball_positions: np.ndarray, ball_positions: np.ndarray -) -> bool: +def any_ball_has_moved(prev_ball_positions: np.ndarray, ball_positions: np.ndarray) -> bool: """ Check if any ball has moved by comparing the previous ball positions with the current ball positions. @@ -147,7 +136,7 @@ def distance_to_pocket(ball_position: np.ndarray, pocket: np.ndarray) -> float: Returns: float: The Euclidean distance between the ball position and the pocket. """ - return np.linalg.norm(pocket - ball_position) + return float(np.linalg.norm(pocket - ball_position)) def distance_to_pockets(ball_position: np.ndarray, pockets: np.ndarray) -> np.ndarray: @@ -176,9 +165,7 @@ def distance_to_closest_pocket(ball_position: np.ndarray, pockets: np.ndarray) - return np.min(distance_to_pockets(ball_position, pockets)) -def distances_to_closest_pocket( - ball_positions: np.ndarray, pockets: np.ndarray -) -> np.ndarray: +def distances_to_closest_pocket(ball_positions: np.ndarray, pockets: np.ndarray) -> np.ndarray: """ Calculates the distances from each ball position to the closest pocket. @@ -189,12 +176,7 @@ def distances_to_closest_pocket( Returns: np.ndarray: An array of distances from each ball position to the closest pocket. """ - return np.array( - [ - distance_to_closest_pocket(ball_position, pockets) - for ball_position in ball_positions - ] - ) + return np.array([distance_to_closest_pocket(ball_position, pockets) for ball_position in ball_positions]) def create_table_state(n_balls: int) -> ff.TableState: @@ -223,9 +205,7 @@ def create_table_state(n_balls: int) -> ff.TableState: return table_state -def create_random_table_state( - n_balls: int, seed: Optional[int] = None -) -> ff.TableState: +def create_random_table_state(n_balls: int, seed: Optional[int] = None) -> ff.TableState: """ Creates a random table state with the specified number of balls. @@ -241,9 +221,7 @@ def create_random_table_state( return table_state -def randomize_table_state( - table_state: ff.TableState, seed: Optional[int] = None -) -> ff.TableState: +def randomize_table_state(table_state: ff.TableState, seed: Optional[int] = None) -> ff.TableState: """ Randomizes the positions of the balls on the pool table within the given table state. @@ -297,9 +275,7 @@ def randomize_table_state( return table_state -def map_action_to_shot_params( - table_state: ff.TableState, action: np.ndarray -) -> np.ndarray: +def map_action_to_shot_params(table_state: ff.TableState, action: np.ndarray) -> np.ndarray: """ Maps the given action values to the corresponding shot parameters within the specified ranges. @@ -313,17 +289,13 @@ def map_action_to_shot_params( """ # a = np.interp(action[0], [0, 0], [0, 0]) # b = np.interp(action[1], [0, 0], [0, 0]) - theta = np.interp( - action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001] - ) + theta = np.interp(action[2], [-1, 1], [table_state.MIN_THETA, table_state.MAX_THETA - 0.001]) phi = np.interp(action[3], [-1, 1], [0, 360]) v = np.interp(action[4], [-1, 1], [0, table_state.MAX_VELOCITY - 0.001]) return np.array([0, 0, theta, phi, v], dtype=np.float64) -def shot_params_from_action( - table_state: ff.TableState, action: np.ndarray -) -> ff.ShotParams: +def shot_params_from_action(table_state: ff.TableState, action: np.ndarray) -> ff.ShotParams: """ Converts an action into shot parameters. @@ -349,21 +321,11 @@ def action_to_shot(action: np.ndarray, action_space: spaces.Box) -> ff.ShotParam MAX_THETA = ff.TableState.MAX_THETA MAX_VELOCITY = ff.TableState.MAX_VELOCITY - a = np.interp( - action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET] - ) - b = np.interp( - action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET] - ) - theta = np.interp( - action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA] - ) - phi = np.interp( - action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI] - ) - velocity = np.interp( - action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY] - ) + a = np.interp(action[0], [action_space.low[0], action_space.high[0]], [MIN_OFFSET, MAX_OFFSET]) + b = np.interp(action[1], [action_space.low[1], action_space.high[1]], [MIN_OFFSET, MAX_OFFSET]) + theta = np.interp(action[2], [action_space.low[2], action_space.high[2]], [MIN_THETA, MAX_THETA]) + phi = np.interp(action[3], [action_space.low[3], action_space.high[3]], [MIN_PHI, MAX_PHI]) + velocity = np.interp(action[4], [action_space.low[4], action_space.high[4]], [0, MAX_VELOCITY]) # print(f"a: {a}, b: {b}, theta: {theta}, phi: {phi}, velocity: {velocity}") @@ -445,9 +407,7 @@ def get_ball_positions_id(table_state: ff.TableState) -> np.ndarray: ball = table_state.getBall(i) pos = ball.getPos() balls.append([ball.getID(), pos.x, pos.y]) - - balls = np.array(balls) - return balls + return np.array(balls) def is_pocketed_state(state: int) -> bool: From 5a4d634655abd6c1d0ca93f238469b7ad5032d10 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:57:46 +0200 Subject: [PATCH 25/44] Fix type hints --- src/fastfiz_env/reward_functions/common/velocity_reward.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fastfiz_env/reward_functions/common/velocity_reward.py b/src/fastfiz_env/reward_functions/common/velocity_reward.py index ad21006..8ca998a 100644 --- a/src/fastfiz_env/reward_functions/common/velocity_reward.py +++ b/src/fastfiz_env/reward_functions/common/velocity_reward.py @@ -17,5 +17,4 @@ def reward( """ Reward function that gives a reward based on velocity of the action. """ - reward = np.interp(action[4], [0, 10], [0, 1]) - return reward + return float(np.interp(action[4], [0, 10], [0, 1])) From 35e5598ced85ff5d3d38f4f4f6b00b5b3137e931 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 22:58:48 +0200 Subject: [PATCH 26/44] Format with Ruff --- src/fastfiz_env/envs/frames_fastfiz.py | 24 +- .../reward_functions/common/weights.py | 2 - src/tes.ipynb | 260 ++++++++++++++++++ src/tests/utils/test_reward_functions.py | 92 ++----- 4 files changed, 289 insertions(+), 89 deletions(-) create mode 100644 src/tes.ipynb diff --git a/src/fastfiz_env/envs/frames_fastfiz.py b/src/fastfiz_env/envs/frames_fastfiz.py index 4f13845..b869cd4 100644 --- a/src/fastfiz_env/envs/frames_fastfiz.py +++ b/src/fastfiz_env/envs/frames_fastfiz.py @@ -26,9 +26,7 @@ class FramesFastFiz(gym.Env): TOTAL_BALLS = 16 # Including the cue ball num_balls = 2 - def __init__( - self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16 - ) -> None: + def __init__(self, reward_function: RewardFunction = DefaultReward, num_balls: int = 16) -> None: super().__init__() if num_balls < 2: warnings.warn( @@ -45,15 +43,11 @@ def __init__( def _max_episode_steps(self): if self.get_wrapper_attr("_time_limit_max_episode_steps") is not None: - self.max_episode_steps = self.get_wrapper_attr( - "_time_limit_max_episode_steps" - ) + self.max_episode_steps = self.get_wrapper_attr("_time_limit_max_episode_steps") print(f"Setting max episode steps to {self.max_episode_steps}") self.reward.max_episode_steps = self.max_episode_steps - def reset( - self, *, seed: Optional[int] = None, options: Optional[dict] = None - ) -> tuple[np.ndarray, dict]: + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: """ Reset the environment to its initial state. """ @@ -174,14 +168,9 @@ def _possible_shot(self, shot_params: ff.ShotParams) -> bool: """ Check if the shot is possible. """ - return ( - self.table_state.isPhysicallyPossible(shot_params) - == ff.TableState.OK_PRECONDITION - ) + return self.table_state.isPhysicallyPossible(shot_params) == ff.TableState.OK_PRECONDITION - def _compute_observation( - self, prev_table_state: ff.TableState, shot: Optional[ff.Shot] - ) -> np.ndarray: + def _compute_observation(self, prev_table_state: ff.TableState, shot: Optional[ff.Shot]) -> np.ndarray: return self.compute_observation(prev_table_state, self.table_state, shot) @classmethod @@ -223,8 +212,7 @@ def compute_observation( pocketed = is_pocketed_state(gb.state) frames_seq[frame][gb.number] = [ *normalize_ball_positions((gb.position.x, gb.position.y)), # type: ignore - normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - - 1, + normalize_ball_velocity(np.hypot(gb.velocity.x, gb.velocity.y)) * 2 - 1, pocketed, ] return frames_seq diff --git a/src/fastfiz_env/reward_functions/common/weights.py b/src/fastfiz_env/reward_functions/common/weights.py index 0590096..a2bd59d 100644 --- a/src/fastfiz_env/reward_functions/common/weights.py +++ b/src/fastfiz_env/reward_functions/common/weights.py @@ -1,5 +1,3 @@ - - ConstantWeight = 1 NegativeConstantWeight = -ConstantWeight diff --git a/src/tes.ipynb b/src/tes.ipynb new file mode 100644 index 0000000..507ccab --- /dev/null +++ b/src/tes.ipynb @@ -0,0 +1,260 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def vec_length(vec: np.ndarray) -> float:\n", + " \"\"\"\n", + " Gets the length of a vector.\n", + " \"\"\"\n", + " return float(np.linalg.norm(vec))" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "def spherical_coordinates(vector: np.ndarray) -> tuple[float, float, float]:\n", + " \"\"\"\n", + " Converts a vector to spherical coordinates.\n", + "\n", + " Returns:\n", + " r: float - The magnitude of the vector.\n", + " theta: float - The angle from the z-axis.\n", + " phi: float - The angle in the xy-plane.\n", + " \"\"\"\n", + " assert len(vector) == 3, \"Vector must have excatly 3 components.\"\n", + " Vx, Vy, Vz = vector\n", + " r = vec_length(vector)\n", + " theta = np.rad2deg(np.arccos(Vz / r))\n", + " phi = np.arctan2(Vy, Vx) # Using arctan2 to get correct quadrant\n", + "\n", + " # phi = np.rad2deg((phi + 2 * np.pi) % (2 * np.pi))\n", + "\n", + " return r, theta, np.rad2deg(phi)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def cart2sph(x, y, z):\n", + " hxy = np.hypot(x, y)\n", + " r = np.hypot(hxy, z)\n", + " el = np.arctan2(z, hxy)\n", + " az = np.arctan2(y, x)\n", + " return az, el, r" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "def cart2params(x, y, z):\n", + " az, el, r = cart2sph(x, y, z)\n", + " az = np.rad2deg(az)\n", + " el = np.rad2deg(el)\n", + " return r, el, az" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "def cartesian_to_spherical(x, y, z):\n", + " r = np.sqrt(x**2 + y**2 + z**2)\n", + " theta = np.rad2deg(np.arccos(z / r))\n", + " phi = np.rad2deg(np.arctan2(y, x))\n", + " return r, theta, phi" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1.044030650891055, 90.0, 16.69924423399362)\n", + "(1.044030650891055, 90.0, 16.69924423399362)\n", + "(1.044030650891055, 0.0, 16.69924423399362)\n" + ] + } + ], + "source": [ + "vec = np.array([1, 0.30, 0])\n", + "print(spherical_coordinates(vec))\n", + "print(cartesian_to_spherical(*vec))\n", + "print(cart2params(*vec))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.4142135623730951, 45.00000000000001, 0.0)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spherical_coordinates(vec)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.0, 0.7853981633974483, 1.4142135623730951)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cart2sph(*vec)" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [], + "source": [ + "def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:\n", + " \"\"\"\n", + " Convert Cartesian coordinates to spherical coordinates.\n", + "\n", + " Args:\n", + " x (float): x-coordinate.\n", + " y (float): y-coordinate.\n", + " z (float): z-coordinate.\n", + "\n", + " Returns:\n", + " tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius.\n", + " \"\"\"\n", + " hxy: float = np.hypot(x, y)\n", + " r: float = np.hypot(hxy, z)\n", + " el: float = np.arctan2(z, hxy)\n", + " az: float = np.arctan2(y, x)\n", + " return az, el, r\n", + "\n", + "\n", + "def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]:\n", + " \"\"\"\n", + " Convert spherical coordinates to degrees.\n", + "\n", + " Args:\n", + " az (float): Azimuth angle in radians.\n", + " el (float): Elevation angle in radians.\n", + " r (float): Radius.\n", + "\n", + " Returns:\n", + " tuple[float, float, float]: A tuple containing elevation angle (in degrees), azimuth angle (in degrees), and radius.\n", + " \"\"\"\n", + " theta: float = np.rad2deg(el) % 180\n", + " phi: float = np.rad2deg(az) % 360\n", + " return theta, phi, r" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(35.264389682754654, 45.0, 346.41016151377545)" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vec = np.array([200, 200, 200])\n", + "cart2sph(*vec)\n", + "sph2deg(*cart2sph(*vec))" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "180.0" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.rad2deg(np.pi)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/tests/utils/test_reward_functions.py b/src/tests/utils/test_reward_functions.py index 5caf0ff..dabe1df 100644 --- a/src/tests/utils/test_reward_functions.py +++ b/src/tests/utils/test_reward_functions.py @@ -27,9 +27,7 @@ def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float class TestRewardFunctions(unittest.TestCase): - possible_shot_action = np.array( - [0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64 - ) + possible_shot_action = np.array([0, 0, ff.TableState.MAX_THETA - 0.001, 0, 0], dtype=np.float64) impossible_shot_action = np.array([0, 0, 0, 0, 0], dtype=np.float64) empty_action = np.array([], dtype=np.float64) @@ -40,12 +38,8 @@ def test_step_pocketed_reward(self): reward = StepPocketedReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) def test_game_won_reward(self): table_state = create_table_state(3) @@ -58,12 +52,8 @@ def test_game_won_reward(self): reward = GameWonReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) def test_constant_reward(self): table_state = create_table_state(2) @@ -71,12 +61,8 @@ def test_constant_reward(self): reward = ConstantReward(weight=weight_fn, max_episode_steps=10) reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10 - ) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 1 + 10) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 2 + 2 + 10) def test_cue_ball_pocketed_reward(self): table_state = create_table_state(2) @@ -86,12 +72,8 @@ def test_cue_ball_pocketed_reward(self): reward = CueBallPocketedReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state_pocketed, self.empty_action), 1) def test_cue_ball_not_moved_reward(self): table_state = create_table_state(2) @@ -101,27 +83,17 @@ def test_cue_ball_not_moved_reward(self): reward = CueBallNotMovedReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 1 - ) - self.assertEqual( - reward.get_reward(table_state, table_state_moved, self.empty_action), 0 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1) + self.assertEqual(reward.get_reward(table_state, table_state_moved, self.empty_action), 0) def test_impossible_shot_reward(self): table_state = create_table_state(2) reward = ImpossibleShotReward() reward.reset(table_state) - self.assertEqual( - reward.get_reward(table_state, table_state, self.possible_shot_action), 0 - ) - self.assertEqual( - reward.get_reward(table_state, table_state, self.impossible_shot_action), 1 - ) - self.assertEqual( - reward.get_reward(table_state, table_state, self.empty_action), 1 - ) + self.assertEqual(reward.get_reward(table_state, table_state, self.possible_shot_action), 0) + self.assertEqual(reward.get_reward(table_state, table_state, self.impossible_shot_action), 1) + self.assertEqual(reward.get_reward(table_state, table_state, self.empty_action), 1) def test_delta_best_total_distance_reward(self): # Pocket: [0., 1.118] @@ -133,9 +105,7 @@ def test_delta_best_total_distance_reward(self): table_state = create_table_state(2) table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118)) - self.assertEqual( - reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0 - ) + self.assertEqual(reward.get_reward(prev_table_state, prev_table_state, self.empty_action), 0) self.assertEqual( reward.get_reward(prev_table_state, table_state, self.empty_action), 0.25, @@ -185,9 +155,7 @@ def test_combined_reward(self): table_state.setBall(1, ff.Ball.STATIONARY, ff.Point(0.25, 1.118)) self.assertEqual( - reward_function.get_reward( - prev_table_state, table_state, self.empty_action - ), + reward_function.get_reward(prev_table_state, table_state, self.empty_action), 3.3, ) @@ -196,22 +164,16 @@ def test_binary_reward_no_short_circuit(self): ImpossibleShotReward(10, short_circuit=False), ConstantReward(5), ] - reward_function = CombinedReward( - reward_functions=rewards_functions, short_circuit=True - ) + reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True) table_state = create_table_state(2) reward_function.reset(table_state) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.possible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.possible_shot_action), 5, ) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.impossible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.impossible_shot_action), 15, ) @@ -220,30 +182,22 @@ def test_binary_reward_short_circuit(self): ImpossibleShotReward(10, short_circuit=True), ConstantReward(5), ] - reward_function = CombinedReward( - reward_functions=rewards_functions, short_circuit=True - ) + reward_function = CombinedReward(reward_functions=rewards_functions, short_circuit=True) table_state = create_table_state(2) reward_function.reset(table_state) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.possible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.possible_shot_action), 5, ) self.assertEqual( - reward_function.get_reward( - table_state, table_state, self.impossible_shot_action - ), + reward_function.get_reward(table_state, table_state, self.impossible_shot_action), 10, ) def test_weights(self): table_state = create_table_state(3) - reward = ConstantReward( - weight=NegativeConstantWeightMaxSteps, max_episode_steps=10 - ) + reward = ConstantReward(weight=NegativeConstantWeightMaxSteps, max_episode_steps=10) reward.reset(table_state) self.assertEqual( From 1558c51779b9af2a443191c4904128703a2e4a2c Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:02:10 +0200 Subject: [PATCH 27/44] Rename test --- src/tests/utils/test_features.py | 27 --------------------------- src/tests/utils/test_wrappers.py | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 27 deletions(-) delete mode 100644 src/tests/utils/test_features.py create mode 100644 src/tests/utils/test_wrappers.py diff --git a/src/tests/utils/test_features.py b/src/tests/utils/test_features.py deleted file mode 100644 index 8a8e2e5..0000000 --- a/src/tests/utils/test_features.py +++ /dev/null @@ -1,27 +0,0 @@ -import unittest -from fastfiz_env.wrappers.utils import deg_to_vec, vec_to_deg, vec_to_abs_deg -import numpy as np - - -class TestFeatures(unittest.TestCase): - def test_deg_to_vec(self): - self.assertTrue(np.allclose(deg_to_vec(0), [1, 0])) - self.assertTrue(np.allclose(deg_to_vec(90), [0, 1])) - self.assertTrue(np.allclose(deg_to_vec(180), [-1, 0])) - self.assertTrue(np.allclose(deg_to_vec(270), [0, -1])) - - def test_vec_to_abs_deg(self): - self.assertEqual(vec_to_abs_deg([1, 0]), 0) - self.assertEqual(vec_to_abs_deg([0, 1]), 90) - self.assertEqual(vec_to_abs_deg([-1, 0]), 180) - self.assertEqual(vec_to_abs_deg([0, -1]), 270) - - def test_vec_to_deg(self): - self.assertEqual(vec_to_deg([1, 0]), 0) - self.assertEqual(vec_to_deg([0, 1]), 90) - self.assertEqual(vec_to_deg([-1, 0]), 180) - self.assertEqual(vec_to_deg([0, -1]), -90) - - -if __name__ == "__main__": - unittest.main() diff --git a/src/tests/utils/test_wrappers.py b/src/tests/utils/test_wrappers.py new file mode 100644 index 0000000..99062d3 --- /dev/null +++ b/src/tests/utils/test_wrappers.py @@ -0,0 +1,22 @@ +import unittest +from fastfiz_env.wrappers.utils import cart2sph, sph2deg + + +class TestFeatures(unittest.TestCase): + def test_cart2sph(self): + x, y, z = 1, 1, 1 + az, el, r = cart2sph(x, y, z) + self.assertAlmostEqual(az, 0.7853981633974483) + self.assertAlmostEqual(el, 0.6154797086703873) + self.assertAlmostEqual(r, 1.7320508075688772) + + def test_sph2deg(self): + az, el, r = 0.7853981633974483, 0.6154797086703873, 1.7320508075688772 + phi, theta, r = sph2deg(az, el, r) + self.assertAlmostEqual(phi, 45.0) + self.assertAlmostEqual(theta, 35.26438968275466) + self.assertAlmostEqual(r, 1.7320508075688772) + + +if __name__ == "__main__": + unittest.main() From 6e627a276e7b3efc09dab9fcd3cd90cfddc5abd6 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:02:24 +0200 Subject: [PATCH 28/44] Remove unsued utils --- src/fastfiz_env/wrappers/action.py | 2 +- src/fastfiz_env/wrappers/utils.py | 36 ------------------------------ 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py index 7e4dedc..7db9aa9 100644 --- a/src/fastfiz_env/wrappers/action.py +++ b/src/fastfiz_env/wrappers/action.py @@ -1,4 +1,4 @@ -from .utils import vec_to_abs_deg, cart2sph, sph2deg +from .utils import cart2sph, sph2deg from gymnasium import ActionWrapper from gymnasium import spaces import numpy as np diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py index 9a89002..caa277e 100644 --- a/src/fastfiz_env/wrappers/utils.py +++ b/src/fastfiz_env/wrappers/utils.py @@ -1,42 +1,6 @@ import numpy as np -def deg_to_vec(deg: float) -> np.ndarray: - """ - Gets the vector of an angle. - """ - rad = np.deg2rad(deg) - return np.array([np.cos(rad), np.sin(rad)], dtype=np.float32) - - -def vec_to_deg(vec: np.ndarray) -> float: - """ - Gets the angle of a vector. - """ - return float(np.rad2deg(np.arctan2(vec[1], vec[0]))) - - -def vec_to_abs_deg(vec: np.ndarray) -> float: - """ - Gets the absolute angle of a vector. - """ - return vec_to_deg(vec) % 360 - - -def vec_mag(vec: np.ndarray) -> float: - """ - Gets the length of a vector. - """ - return float(np.linalg.norm(vec)) - - -def vec_norm(vec: np.ndarray) -> np.ndarray: - """ - Gets the unit vector of a vector. - """ - return vec / vec_mag(vec) - - def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]: """ Convert Cartesian coordinates to spherical coordinates. From f57da5ae428c239a8a2479bd08b99329acb8c620 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:25:03 +0200 Subject: [PATCH 29/44] Fix workflow --- .github/workflows/{lint.yml => ci.yml} | 19 ++++++++---- .github/workflows/python-package.yml | 40 -------------------------- 2 files changed, 14 insertions(+), 45 deletions(-) rename .github/workflows/{lint.yml => ci.yml} (66%) delete mode 100644 .github/workflows/python-package.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/ci.yml similarity index 66% rename from .github/workflows/lint.yml rename to .github/workflows/ci.yml index 0405eb7..5d16d87 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/ci.yml @@ -1,12 +1,11 @@ -name: Lint Python package - +name: Python package on: push: pull_request: branches: ['main'] jobs: - lint: + build: runs-on: ubuntu-latest strategy: fail-fast: false @@ -14,7 +13,6 @@ jobs: python-version: ['3.10', '3.11'] steps: - uses: actions/checkout@v4 - - uses: chartboost/ruff-action@v1 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -22,9 +20,20 @@ jobs: cache: 'pip' - name: Install dependencies run: | - sudo apt-get install python3-opengl + sudo apt update + sudo apt install python3-opengl swig libgsl-dev python -m pip install --upgrade pip pip install ".[test]" + + - name: Lint with Ruff + run: | + ruff check src/fastfiz_env + ruff format src/fastfiz_env + - name: Run MyPy run: | mypy src/fastfiz_env + + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index cbbbaa6..0000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,40 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: Python package - -on: - push: - pull_request: - branches: ['main'] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ['3.10', '3.11'] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - sudo apt-get install python3-opengl - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest From b2d3ae3f53f96ca98d620bb78bc0fd6de4ba8244 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:25:54 +0200 Subject: [PATCH 30/44] Fix workflow --- .github/workflows/test.yml | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 7287df4..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,34 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: Test Python package - -on: - push: - pull_request: - branches: ['main'] - -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ['3.10', '3.11'] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - sudo apt update - sudo apt install python3-opengl swig libgsl-dev - python -m pip install --upgrade pip - pip install ".[test]" - - name: Test with pytest - run: | - pytest From 59ebf1a893e9e3a8866410fe4e2e2294d9129dfd Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:35:01 +0200 Subject: [PATCH 31/44] Remove requirements.txt --- requirements.txt | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index ab5aac4..0000000 --- a/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -fastfiz @ git+https://github.com/P6-Pool/fastfiz.git@2af8aed22bec1faeb5ac92b98b0751a0023f3fb7 -fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062 -p5 @ git+https://github.com/P6-Pool/p5.git@19b96bc00be29d624982c6ecee23fba104457277 -gymnasium==0.29.1 -stable-baselines3==2.2.1 -tensorboard==2.16.2 -tensorflow==2.16.1 -vectormath==0.2.2 -optuna==3.6.1 From 72fdd2cf6772944a5a7314439ae4a3684060a068 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:37:51 +0200 Subject: [PATCH 32/44] Remove .vscode --- .vscode/settings.json | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 1b59914..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "python.analysis.typeCheckingMode": "basic", - "python.analysis.autoImportCompletions": true -} From f418e1e387815c993a88f3c7550a69bf3631806b Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 18 Apr 2024 23:53:37 +0200 Subject: [PATCH 33/44] Remove unused --- src/tes.ipynb | 260 -------------------------------------------------- 1 file changed, 260 deletions(-) delete mode 100644 src/tes.ipynb diff --git a/src/tes.ipynb b/src/tes.ipynb deleted file mode 100644 index 507ccab..0000000 --- a/src/tes.ipynb +++ /dev/null @@ -1,260 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "def vec_length(vec: np.ndarray) -> float:\n", - " \"\"\"\n", - " Gets the length of a vector.\n", - " \"\"\"\n", - " return float(np.linalg.norm(vec))" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "def spherical_coordinates(vector: np.ndarray) -> tuple[float, float, float]:\n", - " \"\"\"\n", - " Converts a vector to spherical coordinates.\n", - "\n", - " Returns:\n", - " r: float - The magnitude of the vector.\n", - " theta: float - The angle from the z-axis.\n", - " phi: float - The angle in the xy-plane.\n", - " \"\"\"\n", - " assert len(vector) == 3, \"Vector must have excatly 3 components.\"\n", - " Vx, Vy, Vz = vector\n", - " r = vec_length(vector)\n", - " theta = np.rad2deg(np.arccos(Vz / r))\n", - " phi = np.arctan2(Vy, Vx) # Using arctan2 to get correct quadrant\n", - "\n", - " # phi = np.rad2deg((phi + 2 * np.pi) % (2 * np.pi))\n", - "\n", - " return r, theta, np.rad2deg(phi)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "def cart2sph(x, y, z):\n", - " hxy = np.hypot(x, y)\n", - " r = np.hypot(hxy, z)\n", - " el = np.arctan2(z, hxy)\n", - " az = np.arctan2(y, x)\n", - " return az, el, r" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "def cart2params(x, y, z):\n", - " az, el, r = cart2sph(x, y, z)\n", - " az = np.rad2deg(az)\n", - " el = np.rad2deg(el)\n", - " return r, el, az" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "def cartesian_to_spherical(x, y, z):\n", - " r = np.sqrt(x**2 + y**2 + z**2)\n", - " theta = np.rad2deg(np.arccos(z / r))\n", - " phi = np.rad2deg(np.arctan2(y, x))\n", - " return r, theta, phi" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1.044030650891055, 90.0, 16.69924423399362)\n", - "(1.044030650891055, 90.0, 16.69924423399362)\n", - "(1.044030650891055, 0.0, 16.69924423399362)\n" - ] - } - ], - "source": [ - "vec = np.array([1, 0.30, 0])\n", - "print(spherical_coordinates(vec))\n", - "print(cartesian_to_spherical(*vec))\n", - "print(cart2params(*vec))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1.4142135623730951, 45.00000000000001, 0.0)" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "spherical_coordinates(vec)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0.0, 0.7853981633974483, 1.4142135623730951)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cart2sph(*vec)" - ] - }, - { - "cell_type": "code", - "execution_count": 155, - "metadata": {}, - "outputs": [], - "source": [ - "def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]:\n", - " \"\"\"\n", - " Convert Cartesian coordinates to spherical coordinates.\n", - "\n", - " Args:\n", - " x (float): x-coordinate.\n", - " y (float): y-coordinate.\n", - " z (float): z-coordinate.\n", - "\n", - " Returns:\n", - " tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius.\n", - " \"\"\"\n", - " hxy: float = np.hypot(x, y)\n", - " r: float = np.hypot(hxy, z)\n", - " el: float = np.arctan2(z, hxy)\n", - " az: float = np.arctan2(y, x)\n", - " return az, el, r\n", - "\n", - "\n", - "def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]:\n", - " \"\"\"\n", - " Convert spherical coordinates to degrees.\n", - "\n", - " Args:\n", - " az (float): Azimuth angle in radians.\n", - " el (float): Elevation angle in radians.\n", - " r (float): Radius.\n", - "\n", - " Returns:\n", - " tuple[float, float, float]: A tuple containing elevation angle (in degrees), azimuth angle (in degrees), and radius.\n", - " \"\"\"\n", - " theta: float = np.rad2deg(el) % 180\n", - " phi: float = np.rad2deg(az) % 360\n", - " return theta, phi, r" - ] - }, - { - "cell_type": "code", - "execution_count": 161, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(35.264389682754654, 45.0, 346.41016151377545)" - ] - }, - "execution_count": 161, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vec = np.array([200, 200, 200])\n", - "cart2sph(*vec)\n", - "sph2deg(*cart2sph(*vec))" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "180.0" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.rad2deg(np.pi)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 9bde5bd40edcb1dbff374604fd595a671ccd1d7d Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 23 Apr 2024 23:06:17 +0200 Subject: [PATCH 34/44] Fix cart2sph --- src/fastfiz_env/wrappers/action.py | 8 ++++---- src/fastfiz_env/wrappers/utils.py | 23 +++++++++++------------ src/tests/utils/test_wrappers.py | 14 +++++++------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py index 7db9aa9..149ff1e 100644 --- a/src/fastfiz_env/wrappers/action.py +++ b/src/fastfiz_env/wrappers/action.py @@ -133,10 +133,10 @@ def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[ ) case ActionSpaces.VECTOR_3D: x, y, z = action - az, el, r = cart2sph(x, y, z) - phi, theta, r = sph2deg(az, el, r) - theta = float(np.interp(theta, (0, np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA))) - phi = float(np.interp(phi, (0, np.rad2deg(2 * np.pi)), (self.MIN_PHI, self.MAX_PHI))) + r, el, az = cart2sph(x, y, z) + r, theta, phi = sph2deg(r, el, az) + phi = float(np.interp(az, (0, np.rad2deg(np.pi)), (self.MIN_PHI, self.MAX_PHI))) + theta = float(np.interp(el, (-np.rad2deg(np.pi), np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA))) velocity = float(np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY))) case ActionSpaces.NORM_3D: theta = float(np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))) diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py index caa277e..0bfe321 100644 --- a/src/fastfiz_env/wrappers/utils.py +++ b/src/fastfiz_env/wrappers/utils.py @@ -11,27 +11,26 @@ def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]: z (float): z-coordinate. Returns: - tuple[float, float, float]: A tuple containing azimuth angle (in degrees), elevation angle (in degrees), and radius. + tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in radians), and azimuth angle (phi, in radians). """ - hxy: float = np.hypot(x, y) - r: float = np.hypot(hxy, z) - el: float = np.arctan2(z, hxy) + r = np.sqrt(x**2 + y**2 + z**2) + el: float = np.arccos(z / r) az: float = np.arctan2(y, x) - return az, el, r + return r, el, az -def sph2deg(az: float, el: float, r: float) -> tuple[float, float, float]: +def sph2deg(r: float, el: float, az: float) -> tuple[float, float, float]: """ Convert spherical coordinates to degrees. Args: - az (float): Azimuth angle in radians. - el (float): Elevation angle in radians. r (float): Radius. + el (float): Elevation angle in radians. + az (float): Azimuth angle in radians. Returns: - tuple[float, float, float]: A tuple containing azimuth angle (phi, in degrees), elevation angle (theta, in degrees), and radius. + tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in degrees), and azimuth angle (phi, in degrees). """ - phi: float = np.rad2deg(az % (2 * np.pi)) - theta: float = np.rad2deg(el % np.pi) - return phi, theta, r + theta: float = np.rad2deg(el) + phi: float = np.rad2deg(az) + return r, theta, phi diff --git a/src/tests/utils/test_wrappers.py b/src/tests/utils/test_wrappers.py index 99062d3..8cf83a1 100644 --- a/src/tests/utils/test_wrappers.py +++ b/src/tests/utils/test_wrappers.py @@ -5,17 +5,17 @@ class TestFeatures(unittest.TestCase): def test_cart2sph(self): x, y, z = 1, 1, 1 - az, el, r = cart2sph(x, y, z) - self.assertAlmostEqual(az, 0.7853981633974483) - self.assertAlmostEqual(el, 0.6154797086703873) + r, el, az = cart2sph(x, y, z) self.assertAlmostEqual(r, 1.7320508075688772) + self.assertAlmostEqual(el, 0.9553166181245092) + self.assertAlmostEqual(az, 0.7853981633974483) def test_sph2deg(self): - az, el, r = 0.7853981633974483, 0.6154797086703873, 1.7320508075688772 - phi, theta, r = sph2deg(az, el, r) - self.assertAlmostEqual(phi, 45.0) - self.assertAlmostEqual(theta, 35.26438968275466) + r, el, az = 1.7320508075688772, 0.9553166181245092, 0.7853981633974483 + r, theta, phi = sph2deg(r, el, az) self.assertAlmostEqual(r, 1.7320508075688772) + self.assertAlmostEqual(theta, 54.735610317245346) + self.assertAlmostEqual(phi, 45.00) if __name__ == "__main__": From 1baf5eba91b4433f978407aa7c60bb45963fe5f1 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 23 Apr 2024 23:06:30 +0200 Subject: [PATCH 35/44] Add plot script --- src/plot.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 src/plot.py diff --git a/src/plot.py b/src/plot.py new file mode 100644 index 0000000..8ba20ed --- /dev/null +++ b/src/plot.py @@ -0,0 +1,106 @@ +import numpy as np +from tensorboard.backend.event_processing.event_accumulator import EventAccumulator +import matplotlib.pyplot as plt +import argparse + + +def smooth(scalars: list[float] | np.ndarray, weight: float) -> list[float]: + """ + EMA implementation according to + https://github.com/tensorflow/tensorboard/blob/34877f15153e1a2087316b9952c931807a122aa7/tensorboard/components/vz_line_chart2/line-chart.ts#L699 + """ + last = 0 + smoothed = [] + num_acc = 0 + for next_val in scalars: + last = last * weight + (1 - weight) * next_val + num_acc += 1 + # de-bias + debias_weight = 1 + if weight != 1: + debias_weight = 1 - (weight**num_acc) + smoothed_val = last / debias_weight + smoothed.append(smoothed_val) + + return smoothed + + +def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5): + """ + Plot TensorBoard logs for specified tags with optional smoothing. + + Args: + - log_dir (str): Path to the directory containing TensorBoard logs. + - tags_to_plot (list): List of tags to plot. + - smooth_weight (int, optional): Window size for moving average smoothing. Default is 0.5. + + Returns: + - None + """ + + # Determine colors for lines + colors = plt.cm.Set2(np.linspace(0, 1, len(tags_to_plot))) # type: ignore + line_width = 0.6 + + for log_dir in log_dirs: + # Load TensorBoard logs + event_acc = EventAccumulator(log_dir) + event_acc.Reload() + + # Get all scalar events + scalar_tags = event_acc.Tags()["scalars"] + + # Load TensorBoard logs + event_acc = EventAccumulator(log_dir) + event_acc.Reload() + + # Plot specified tags + for i, tag in enumerate(tags_to_plot): + if tag in scalar_tags: + events = event_acc.Scalars(tag) + steps = np.array([event.step for event in events]) + values = np.array([event.value for event in events]) + + if smooth_weight > 0: + # Apply moving average smoothing + smoothed_values = smooth(values, smooth_weight) + + # Plot smoothed data with custom color + plt.plot( + steps, + smoothed_values, + label=tag + f" ({smooth_weight} smoothing)", + color=colors[i], + linewidth=line_width, + ) + + # Plot original data with lower opacity using the same color + plt.plot(steps, values, alpha=0.3, color=colors[i], label=tag, linewidth=line_width) + else: + plt.plot(steps, values, color=colors[i], label=tag, linewidth=line_width) + else: + print(f"Tag '{tag}' not found in TensorBoard logs.") + + plt.grid(True, alpha=0.1) + plt.xlabel("Step") + plt.ylabel("Value") + plt.legend() + plot_name = "plot-" + "-".join(tags_to_plot).replace("/", "_") + ".pdf" + plt.savefig(plot_name) + print(f"Plot saved as '{plot_name}'") + plt.show() + + +def main(): + parser = argparse.ArgumentParser(description="Plot TensorBoard logs.") + parser.add_argument("log_dirs", nargs="+", help="Path(s) to the directory containing TensorBoard logs") + parser.add_argument("-t", "--tags", nargs="+", help="Scalar tags to plot", required=True) + parser.add_argument("-s", "--smoothing", type=float, default=0, help="Window size for moving average smoothing") + + args = parser.parse_args() + + plot_tensorboard_logs(args.log_dirs, args.tags, smooth_weight=args.smoothing) + + +if __name__ == "__main__": + main() From da3649157427d1ef7450a50cc2495d73438fe776 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Wed, 24 Apr 2024 12:19:13 +0200 Subject: [PATCH 36/44] Remove velocity from reward --- src/fastfiz_env/reward_functions/default_reward.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fastfiz_env/reward_functions/default_reward.py b/src/fastfiz_env/reward_functions/default_reward.py index fc0ce87..b439514 100644 --- a/src/fastfiz_env/reward_functions/default_reward.py +++ b/src/fastfiz_env/reward_functions/default_reward.py @@ -8,7 +8,7 @@ NegativeConstantWeightMaxSteps, ConstantWeight, NegativeConstantWeight, - ExponentialVelocityReward, + # ExponentialVelocityReward, BallsNotMovedReward, ) @@ -19,7 +19,6 @@ ConstantReward(NegativeConstantWeightMaxSteps), BallsNotMovedReward(NegativeConstantWeightMaxSteps), StepPocketedReward(ConstantWeightBalls), - ExponentialVelocityReward(NegativeConstantWeight), ] DefaultReward = CombinedReward(reward_functions=rewards, short_circuit=True) @@ -32,7 +31,6 @@ - ConstantReward: -1 / max_episode_steps - BallsNotMovedReward: -1 / max_episode_steps - StepPocketedReward: 1 / (num_balls - 1) -- ExponentialVelocityReward: -1 Returns: CombinedReward: The default reward function. From 26b4cf21b602f44299b7bcf0ec2c3f842571ed80 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Wed, 24 Apr 2024 12:19:28 +0200 Subject: [PATCH 37/44] Fix calc and conversion --- src/fastfiz_env/wrappers/action.py | 6 +++--- src/fastfiz_env/wrappers/utils.py | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/fastfiz_env/wrappers/action.py b/src/fastfiz_env/wrappers/action.py index 149ff1e..6cfb4cc 100644 --- a/src/fastfiz_env/wrappers/action.py +++ b/src/fastfiz_env/wrappers/action.py @@ -134,9 +134,9 @@ def action(self, action: np.ndarray[float, np.dtype[np.float32]]) -> np.ndarray[ case ActionSpaces.VECTOR_3D: x, y, z = action r, el, az = cart2sph(x, y, z) - r, theta, phi = sph2deg(r, el, az) - phi = float(np.interp(az, (0, np.rad2deg(np.pi)), (self.MIN_PHI, self.MAX_PHI))) - theta = float(np.interp(el, (-np.rad2deg(np.pi), np.rad2deg(np.pi)), (self.MIN_THETA, self.MAX_THETA))) + r, el, az = sph2deg(r, el, az) + phi = float(np.interp(az, (0, 360), (self.MIN_PHI, self.MAX_PHI))) + theta = float(np.interp(el, (0, 180), (self.MIN_THETA, self.MAX_THETA))) velocity = float(np.interp(r, (0, np.sqrt(3)), (self.MIN_VELOCITY, self.MAX_VELOCITY))) case ActionSpaces.NORM_3D: theta = float(np.interp(action[0], (-1, 1), (self.MIN_THETA, self.MAX_THETA))) diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py index 0bfe321..0080d79 100644 --- a/src/fastfiz_env/wrappers/utils.py +++ b/src/fastfiz_env/wrappers/utils.py @@ -14,8 +14,9 @@ def cart2sph(x: float, y: float, z: float) -> tuple[float, float, float]: tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in radians), and azimuth angle (phi, in radians). """ r = np.sqrt(x**2 + y**2 + z**2) - el: float = np.arccos(z / r) - az: float = np.arctan2(y, x) + el = np.arccos(z / r) + az = (np.arctan2(y, x) + 2 * np.pi) % (2 * np.pi) # Using arctan2 to get correct quadrant + return r, el, az @@ -31,6 +32,6 @@ def sph2deg(r: float, el: float, az: float) -> tuple[float, float, float]: Returns: tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in degrees), and azimuth angle (phi, in degrees). """ - theta: float = np.rad2deg(el) - phi: float = np.rad2deg(az) - return r, theta, phi + el: float = np.rad2deg(el) + az: float = np.rad2deg(az) + return r, el, az From ee54e636b563064ae41970c13376925248855fd4 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 25 Apr 2024 11:49:34 +0200 Subject: [PATCH 38/44] Add action_space_id option --- src/fastfiz_env/make.py | 8 +++++--- src/train.py | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/fastfiz_env/make.py b/src/fastfiz_env/make.py index 7fa8b2a..2f2f30d 100644 --- a/src/fastfiz_env/make.py +++ b/src/fastfiz_env/make.py @@ -43,6 +43,7 @@ def make_wrapped_env( num_balls: int, max_episode_steps: int, reward_function: RewardFunction, + action_space_id: ActionSpaces, **kwargs, ): """ @@ -56,7 +57,7 @@ def make_wrapped_env( disable_env_checker=False, **kwargs, ) - env = FastFizActionWrapper(env, action_space_id=ActionSpaces.VECTOR_3D) + env = FastFizActionWrapper(env, action_space_id=action_space_id) return env @@ -64,7 +65,8 @@ def make_callable_wrapped_env( env_id: str, num_balls: int, max_episode_steps: int, - reward_function: RewardFunction, + reward_function: RewardFunction = DefaultReward, + action_space_id: ActionSpaces = ActionSpaces.VECTOR_3D, **kwargs, ): """ @@ -73,6 +75,6 @@ def make_callable_wrapped_env( """ def _init() -> gym.Env: - return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, **kwargs) + return make_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, action_space_id, **kwargs) return _init diff --git a/src/train.py b/src/train.py index 7157dcd..121d5b5 100644 --- a/src/train.py +++ b/src/train.py @@ -13,6 +13,7 @@ CallbackList, ) from stable_baselines3.common.env_util import make_vec_env +from fastfiz_env.wrappers.action import ActionSpaces from hyperparams import params_to_kwargs @@ -40,11 +41,12 @@ def train( logs_path: str = "logs/", models_path: str = "models/", reward_function: RewardFunction = DefaultReward, + action_space_id: ActionSpaces = ActionSpaces.VECTOR_3D, callbacks=None, params: Optional[dict] = None, ) -> None: env = make_vec_env( - make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function), + make_callable_wrapped_env(env_id, num_balls, max_episode_steps, reward_function, action_space_id=action_space_id), n_envs=n_envs, ) @@ -122,6 +124,8 @@ def train( help="Path to hyperparameters file (file must have key 'params' with dict of hyperparameters", ) + parser.add_argument("-a", "--action_id", type=ActionSpaces, choices=list(ActionSpaces), default=ActionSpaces.VECTOR_3D) + args = parser.parse_args() reward_function = DefaultReward if args.reward == "DefaultReward" else WinningReward @@ -164,5 +168,6 @@ def train( logs_path=logs_path, models_path=models_path, reward_function=reward_function, + action_space_id=args.action_id, params=params, ) From c17989b05bd3fa996e51bf9b66ac8bc5856f2ad5 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Thu, 25 Apr 2024 11:55:40 +0200 Subject: [PATCH 39/44] Fix model name --- src/train.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/train.py b/src/train.py index 121d5b5..aadc06f 100644 --- a/src/train.py +++ b/src/train.py @@ -27,8 +27,8 @@ def get_latest_run_id(log_path: str, name: str) -> int: return id -def get_model_name(env_name: str, balls: int, algo: str = "PPO") -> str: - return f"{env_name.split('FastFiz-v0')[0]}-{balls}_balls-{algo}".lower() +def get_model_name(env_name: str, balls: int, algo: str = "PPO", action_space_id=ActionSpaces.VECTOR_3D) -> str: + return f"{env_name.split('FastFiz-v0')[0]}-{balls}_balls-{action_space_id.name}-{algo}".lower() def train( @@ -52,7 +52,7 @@ def train( hyperparams = params_to_kwargs(**params) if params else {} print(hyperparams) - model_name = get_model_name(env_id, num_balls) + model_name = get_model_name(env_id, num_balls, action_space_id) if model_dir is None: model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logs_path, **hyperparams) @@ -156,7 +156,8 @@ def train( model_path: {model_path}\n\ logs_path: {logs_path}\n\ models_path: {models_path}\n\ - reward_function: {reward}\n" + reward_function: {reward}\n\ + action_space_id: {args.action_id}\n" ) train( From 20b71da1c4eb08d749de3774e778ae5e390f892a Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 30 Apr 2024 14:23:16 +0200 Subject: [PATCH 40/44] Add script to log random policy evaluation metrics --- src/random_policy.py | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/random_policy.py diff --git a/src/random_policy.py b/src/random_policy.py new file mode 100644 index 0000000..6c95116 --- /dev/null +++ b/src/random_policy.py @@ -0,0 +1,63 @@ +from torch.utils.tensorboard import SummaryWriter + +from stable_baselines3.common.env_util import make_vec_env +from fastfiz_env.reward_functions.default_reward import DefaultReward +from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper +import fastfiz_env + + +# Create and wrap the environment +# env = DummyVecEnv([lambda: gym.make("CartPole-v1")]) +env = fastfiz_env.make_callable_wrapped_env( + "PocketsFastFiz-v0", + max_episode_steps=20, + reward_function=DefaultReward, + action_space_id=ActionSpaces.VECTOR_3D, + num_balls=2, +) + +env = make_vec_env(env, n_envs=1) + +total_timesteps = 8_000_000 +eval_freq = 50_000 +eval_episodes = 100 +total_runs = (total_timesteps // eval_freq) * eval_episodes +# total_runs = 10000 + +# Initialize the SummaryWriter +writer = SummaryWriter(log_dir="logs/random_policy", comment="-random-policy") + +total_success = 0 +total_len = 0 +total_reward = 0 +for episode in range(total_runs): + obs = env.reset() + + # Your policy rollout code here + done = False + while not done: + action = env.action_space.sample() # Random policy + obs, reward, done, info = env.step([action]) + total_len += 1 + total_reward += reward + + total_success += int(info[0]["is_success"]) + + # Log episode reward + + +success_mean = total_success / total_runs +episode_mean = total_len / total_runs +rew_mean = total_reward / total_runs +print(f"Success rate: {success_mean}") +print(f"Mean episode length: {episode_mean}") +print(f"Mean episode reward: {rew_mean}") +for episode in range(1, total_timesteps + eval_freq - 1): + if episode % eval_freq == 0: + writer.add_scalar("eval/success_rate", success_mean, episode) + writer.add_scalar("eval/mean_reward", rew_mean, episode) + writer.add_scalar("eval/mean_ep_length", episode_mean, episode) + +# Close the environment and SummaryWriter +env.close() +writer.close() From 8dd0213882ab069883aeaa69112a65b3651e0890 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 30 Apr 2024 14:25:07 +0200 Subject: [PATCH 41/44] Reset with seed --- src/fastfiz_env/envs/pockets_fastfiz.py | 2 +- src/fastfiz_env/envs/simple_fastfiz.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fastfiz_env/envs/pockets_fastfiz.py b/src/fastfiz_env/envs/pockets_fastfiz.py index ae7c9bf..3cc789a 100644 --- a/src/fastfiz_env/envs/pockets_fastfiz.py +++ b/src/fastfiz_env/envs/pockets_fastfiz.py @@ -60,7 +60,7 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) - if self.max_episode_steps is None or self.elapsed_steps is None: self._get_time_limit_attrs() - self.table_state = create_random_table_state(self.num_balls) + self.table_state = create_random_table_state(self.num_balls, seed=seed) self.reward.reset(self.table_state) self._prev_pocketed = 0 diff --git a/src/fastfiz_env/envs/simple_fastfiz.py b/src/fastfiz_env/envs/simple_fastfiz.py index 3de9ddf..bc628c4 100644 --- a/src/fastfiz_env/envs/simple_fastfiz.py +++ b/src/fastfiz_env/envs/simple_fastfiz.py @@ -58,7 +58,7 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) - if self.max_episode_steps is None or self.elapsed_steps is None: self._get_time_limit_attrs() - self.table_state = create_random_table_state(self.num_balls) + self.table_state = create_random_table_state(self.num_balls, seed=seed) self.reward.reset(self.table_state) self._prev_pocketed = 0 From f9fd52fc705ce8c3439d2c1b93e3f45078f3b4a6 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 30 Apr 2024 14:25:40 +0200 Subject: [PATCH 42/44] Remove commit versions --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 63e4dba..1413b98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - "fastfiz @ git+https://github.com/P6-Pool/fastfiz.git@2af8aed22bec1faeb5ac92b98b0751a0023f3fb7", + "fastfiz @ git+https://github.com/P6-Pool/fastfiz.git", "gymnasium", "numpy", "vectormath", @@ -18,7 +18,7 @@ dependencies = [ [project.optional-dependencies] dev = [ - "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062", + "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git", "stable-baselines3", "tqdm", "rich", @@ -29,7 +29,7 @@ dev = [ test = ["pytest", "mypy", "ruff"] all = [ # dev - "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git@4ffb95e8683b30975b1d8f5dd483d56599e1e062", + "fastfiz_renderer @ git+https://github.com/P6-Pool/fastfiz-renderer.git", "stable-baselines3", "tqdm", "rich", From d29ed7833140842002dddbd2368645f2c1554468 Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 30 Apr 2024 14:28:28 +0200 Subject: [PATCH 43/44] Setup Latex plots --- src/plot.py | 84 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/src/plot.py b/src/plot.py index 8ba20ed..b9bb25b 100644 --- a/src/plot.py +++ b/src/plot.py @@ -3,6 +3,15 @@ import matplotlib.pyplot as plt import argparse +# Setup for Latex rendering +plt.rcParams.update( + { + "text.usetex": True, + "font.family": "serif", # Use serif font + "font.serif": ["Computer Modern Roman"], # Use Computer Modern Roman font + } +) + def smooth(scalars: list[float] | np.ndarray, weight: float) -> list[float]: """ @@ -25,24 +34,28 @@ def smooth(scalars: list[float] | np.ndarray, weight: float) -> list[float]: return smoothed -def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5): +def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5, show=True) -> None: """ Plot TensorBoard logs for specified tags with optional smoothing. Args: - - log_dir (str): Path to the directory containing TensorBoard logs. + - log_dirs (str): Path to the directory containing TensorBoard logs. - tags_to_plot (list): List of tags to plot. - smooth_weight (int, optional): Window size for moving average smoothing. Default is 0.5. + - show (bool, optional): Whether to display the plot. Default is True. Returns: - None """ # Determine colors for lines - colors = plt.cm.Set2(np.linspace(0, 1, len(tags_to_plot))) # type: ignore - line_width = 0.6 + # colors = plt.cm.tab10(np.linspace(0, 1, len(tags_to_plot))) # type: ignore + cmap = plt.get_cmap("tab10") + colors = [cmap(int(i * 3.25 % 10)) for i in np.linspace(0, 1, len(tags_to_plot) * len(log_dirs))] + + line_width = 0.5 - for log_dir in log_dirs: + for j, log_dir in enumerate(log_dirs): # Load TensorBoard logs event_acc = EventAccumulator(log_dir) event_acc.Reload() @@ -50,17 +63,33 @@ def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5): # Get all scalar events scalar_tags = event_acc.Tags()["scalars"] + graph_name = log_dir.split("/")[-1] + plot_name = ", ".join(tags_to_plot) + # Load TensorBoard logs event_acc = EventAccumulator(log_dir) event_acc.Reload() # Plot specified tags for i, tag in enumerate(tags_to_plot): + color = colors[j * len(tags_to_plot) + i] if tag in scalar_tags: events = event_acc.Scalars(tag) steps = np.array([event.step for event in events]) values = np.array([event.value for event in events]) + # Custom action space labels + def action_space(n: int): + return r"$\mathcal{A}_" + f"{n}" + r"$" + + label = graph_name.replace("_", ", ").replace("-", " ") + if "cart" in graph_name.lower(): + label = label.split(", ")[0] + f", {action_space(2)}" + elif "reg" in graph_name.lower(): + label = label.split(", ")[0] + f", {action_space(1)}" + elif "random" in graph_name.lower(): + label = label.split(", ")[0] + ", random policy" + if smooth_weight > 0: # Apply moving average smoothing smoothed_values = smooth(values, smooth_weight) @@ -69,37 +98,62 @@ def plot_tensorboard_logs(log_dirs, tags_to_plot, smooth_weight=0.5): plt.plot( steps, smoothed_values, - label=tag + f" ({smooth_weight} smoothing)", - color=colors[i], + label=label + f" ({smooth_weight} EMA)", + color=color, linewidth=line_width, ) # Plot original data with lower opacity using the same color - plt.plot(steps, values, alpha=0.3, color=colors[i], label=tag, linewidth=line_width) + plt.plot(steps, values, alpha=0.25, color=color, label=None, linewidth=line_width) else: - plt.plot(steps, values, color=colors[i], label=tag, linewidth=line_width) + plt.plot(steps, values, color=color, label=label, linewidth=line_width) else: print(f"Tag '{tag}' not found in TensorBoard logs.") plt.grid(True, alpha=0.1) plt.xlabel("Step") - plt.ylabel("Value") - plt.legend() + plt.ylabel( + tags_to_plot[0] + .split("/")[-1] + .replace("_", " ") + .replace("rew", "reward") + .replace("ep", "episode") + .replace("len", "length") + .title() + ) + plt.legend(fontsize="small") + plot_name = "plot-" + "-".join(tags_to_plot).replace("/", "_") + ".pdf" plt.savefig(plot_name) print(f"Plot saved as '{plot_name}'") - plt.show() + if show: + plt.show() + plt.clf() def main(): parser = argparse.ArgumentParser(description="Plot TensorBoard logs.") parser.add_argument("log_dirs", nargs="+", help="Path(s) to the directory containing TensorBoard logs") - parser.add_argument("-t", "--tags", nargs="+", help="Scalar tags to plot", required=True) + parser.add_argument("-t", "--tags", nargs="+", help="Scalar tags to plot") parser.add_argument("-s", "--smoothing", type=float, default=0, help="Window size for moving average smoothing") - + parser.add_argument("-a", "--all", action="store_true", help="Plot all tags in the log directory") args = parser.parse_args() - plot_tensorboard_logs(args.log_dirs, args.tags, smooth_weight=args.smoothing) + if args.all: + tags_to_plot = [ + "eval/success_rate", + "eval/mean_reward", + "eval/mean_ep_length", + "rollout/ep_rew_mean", + "rollout/ep_len_mean", + ] + for tag in tags_to_plot: + smoothing = 0 + if tag.startswith("rollout"): + smoothing = args.smoothing or 0.5 + plot_tensorboard_logs(args.log_dirs, [tag], smooth_weight=smoothing, show=False) + else: + plot_tensorboard_logs(args.log_dirs, args.tags, smooth_weight=args.smoothing) if __name__ == "__main__": From c012f8b975acb5bfacc0483859b70e1acde34a8a Mon Sep 17 00:00:00 2001 From: Mads Risager Date: Tue, 30 Apr 2024 14:43:24 +0200 Subject: [PATCH 44/44] Fix already defined --- src/fastfiz_env/wrappers/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fastfiz_env/wrappers/utils.py b/src/fastfiz_env/wrappers/utils.py index 0080d79..f518cf4 100644 --- a/src/fastfiz_env/wrappers/utils.py +++ b/src/fastfiz_env/wrappers/utils.py @@ -32,6 +32,6 @@ def sph2deg(r: float, el: float, az: float) -> tuple[float, float, float]: Returns: tuple[float, float, float]: A tuple containing radius (magnitude), elevation angle (theta, in degrees), and azimuth angle (phi, in degrees). """ - el: float = np.rad2deg(el) - az: float = np.rad2deg(az) - return r, el, az + el_deg: float = np.rad2deg(el) + az_deg: float = np.rad2deg(az) + return r, el_deg, az_deg