diff --git a/fastfiz_env/__init__.py b/fastfiz_env/__init__.py index 907a494..586e6df 100644 --- a/fastfiz_env/__init__.py +++ b/fastfiz_env/__init__.py @@ -26,9 +26,9 @@ __version__ = "0.0.1" -from .make import make, make_wrapped_env, make_callable_wrapped_env -from .reward_functions import DefaultReward, RewardFunction, CombinedReward -from . import envs, utils, wrappers, reward_functions +from . import envs, reward_functions, utils, wrappers +from .make import make, make_callable_wrapped_env, make_wrapped_env +from .reward_functions import CombinedReward, DefaultReward, RewardFunction __all__ = [ "make", @@ -45,7 +45,6 @@ from gymnasium.envs.registration import register - register( id="FastFiz-v0", entry_point="fastfiz_env.envs:FastFiz", diff --git a/fastfiz_env/envs/fastfiz.py b/fastfiz_env/envs/fastfiz.py index 73bb4db..c3e47db 100644 --- a/fastfiz_env/envs/fastfiz.py +++ b/fastfiz_env/envs/fastfiz.py @@ -1,19 +1,20 @@ from logging import warn +from typing import Optional + import fastfiz as ff -import numpy as np import gymnasium as gym +import numpy as np from gymnasium import spaces from fastfiz_env.utils.fastfiz.fastfiz import num_balls_pocketed +from ..reward_functions import DefaultReward, RewardFunction from ..utils.fastfiz import ( create_random_table_state, get_ball_positions, normalize_ball_positions, ) -from .utils import game_won, terminal_state, possible_shot -from typing import Optional -from ..reward_functions import RewardFunction, DefaultReward +from .utils import game_won, possible_shot, terminal_state class FastFiz(gym.Env): diff --git a/fastfiz_env/envs/pockets_fastfiz.py b/fastfiz_env/envs/pockets_fastfiz.py index 3cc789a..38fd451 100644 --- a/fastfiz_env/envs/pockets_fastfiz.py +++ b/fastfiz_env/envs/pockets_fastfiz.py @@ -1,21 +1,22 @@ from logging import warn +from typing import Optional + import fastfiz as ff -import numpy as np import gymnasium as gym +import numpy as np from gymnasium import spaces +from ..reward_functions import DefaultReward, RewardFunction from ..utils.fastfiz import ( + ball_state_to_pocket, create_random_table_state, get_ball_positions, - normalize_ball_positions, - pocket_centers, - ball_state_to_pocket, get_pocket_center, + normalize_ball_positions, num_balls_pocketed, + pocket_centers, ) -from .utils import game_won, terminal_state, possible_shot -from typing import Optional -from ..reward_functions import RewardFunction, DefaultReward +from .utils import game_won, possible_shot, terminal_state class PocketsFastFiz(gym.Env): diff --git a/fastfiz_env/make.py b/fastfiz_env/make.py index 2f2f30d..2ce64fa 100644 --- a/fastfiz_env/make.py +++ b/fastfiz_env/make.py @@ -1,8 +1,9 @@ -from gymnasium.envs.registration import EnvSpec import gymnasium as gym +from gymnasium.envs.registration import EnvSpec from fastfiz_env.wrappers.action import ActionSpaces, FastFizActionWrapper -from .reward_functions import RewardFunction, DefaultReward + +from .reward_functions import DefaultReward, RewardFunction def make( diff --git a/fastfiz_env/reward_functions/__init__.py b/fastfiz_env/reward_functions/__init__.py index 25adc18..e442984 100644 --- a/fastfiz_env/reward_functions/__init__.py +++ b/fastfiz_env/reward_functions/__init__.py @@ -2,12 +2,12 @@ This module contains the reward functions used in the FastFiz environment. """ -from .reward_function import RewardFunction, Weight -from .combined_reward import CombinedReward +from . import common from .binary_reward import BinaryReward +from .combined_reward import CombinedReward from .default_reward import DefaultReward +from .reward_function import RewardFunction, Weight from .winning_reward import WinningReward -from . import common __all__ = [ "RewardFunction", diff --git a/fastfiz_env/reward_functions/binary_reward.py b/fastfiz_env/reward_functions/binary_reward.py index c6f127c..44e7807 100644 --- a/fastfiz_env/reward_functions/binary_reward.py +++ b/fastfiz_env/reward_functions/binary_reward.py @@ -1,9 +1,11 @@ from abc import ABC, abstractmethod from typing import Optional, Union + import fastfiz as ff -from .reward_function import RewardFunction, Weight import numpy as np +from .reward_function import RewardFunction, Weight + class BinaryReward(RewardFunction, ABC): def __init__( diff --git a/fastfiz_env/reward_functions/combined_reward.py b/fastfiz_env/reward_functions/combined_reward.py index 29e7140..006c623 100644 --- a/fastfiz_env/reward_functions/combined_reward.py +++ b/fastfiz_env/reward_functions/combined_reward.py @@ -1,9 +1,11 @@ from typing import Optional -from .reward_function import RewardFunction, Weight -from .binary_reward import BinaryReward + import fastfiz as ff import numpy as np +from .binary_reward import BinaryReward +from .reward_function import RewardFunction, Weight + class CombinedReward(RewardFunction): """ diff --git a/fastfiz_env/reward_functions/common/__init__.py b/fastfiz_env/reward_functions/common/__init__.py index 58446f4..01584a2 100644 --- a/fastfiz_env/reward_functions/common/__init__.py +++ b/fastfiz_env/reward_functions/common/__init__.py @@ -2,33 +2,31 @@ This module contains implementations of useful reward functions for the FastFiz environment. """ -from .step_pocketed_reward import StepPocketedReward -from .total_distance_reward import TotalDistanceReward -from .delta_best_total_distance_reward import DeltaBestTotalDistanceReward -from .cue_ball_pocketed_reward import CueBallPocketedReward +from .balls_not_moved_reward import BallsNotMovedReward +from .constant_reward import ConstantReward from .cue_ball_not_moved_reward import CueBallNotMovedReward +from .cue_ball_pocketed_reward import CueBallPocketedReward +from .delta_best_total_distance_reward import DeltaBestTotalDistanceReward +from .exponential_velocity_reward import ExponentialVelocityReward from .game_won_reward import GameWonReward from .impossible_shot_reward import ImpossibleShotReward -from .constant_reward import ConstantReward -from .balls_not_moved_reward import BallsNotMovedReward -from .velocity_reward import VelocityReward -from .exponential_velocity_reward import ExponentialVelocityReward from .step_no_balls_pocketed_reward import StepNoBallsPocketedReward - +from .step_pocketed_reward import StepPocketedReward +from .total_distance_reward import TotalDistanceReward +from .velocity_reward import VelocityReward from .weights import ( ConstantWeight, - NegativeConstantWeight, + ConstantWeightBalls, + ConstantWeightCurrentStep, ConstantWeightMaxSteps, - NegativeConstantWeightMaxSteps, ConstantWeightNumBalls, - NegativeConstantWeightNumBalls, - ConstantWeightBalls, + NegativeConstantWeight, NegativeConstantWeightBalls, - ConstantWeightCurrentStep, NegativeConstantWeightCurrentStep, + NegativeConstantWeightMaxSteps, + NegativeConstantWeightNumBalls, ) - __all__ = [ # Reward functions "StepPocketedReward", diff --git a/fastfiz_env/reward_functions/common/balls_not_moved_reward.py b/fastfiz_env/reward_functions/common/balls_not_moved_reward.py index cbb3696..f492088 100644 --- a/fastfiz_env/reward_functions/common/balls_not_moved_reward.py +++ b/fastfiz_env/reward_functions/common/balls_not_moved_reward.py @@ -1,5 +1,5 @@ -from .. import BinaryReward from ...utils.fastfiz import any_ball_has_moved, get_ball_positions +from .. import BinaryReward class BallsNotMovedReward(BinaryReward): diff --git a/fastfiz_env/reward_functions/common/constant_reward.py b/fastfiz_env/reward_functions/common/constant_reward.py index 11a3ece..c9c23ad 100644 --- a/fastfiz_env/reward_functions/common/constant_reward.py +++ b/fastfiz_env/reward_functions/common/constant_reward.py @@ -1,7 +1,8 @@ -from ..reward_function import RewardFunction import fastfiz as ff import numpy as np +from ..reward_function import RewardFunction + class ConstantReward(RewardFunction): """ diff --git a/fastfiz_env/reward_functions/common/cue_ball_not_moved_reward.py b/fastfiz_env/reward_functions/common/cue_ball_not_moved_reward.py index 71a5049..def554f 100644 --- a/fastfiz_env/reward_functions/common/cue_ball_not_moved_reward.py +++ b/fastfiz_env/reward_functions/common/cue_ball_not_moved_reward.py @@ -1,7 +1,8 @@ -from .. import BinaryReward import fastfiz as ff import numpy as np +from .. import BinaryReward + class CueBallNotMovedReward(BinaryReward): """ diff --git a/fastfiz_env/reward_functions/common/cue_ball_pocketed_reward.py b/fastfiz_env/reward_functions/common/cue_ball_pocketed_reward.py index bd3ec38..b0f2bd7 100644 --- a/fastfiz_env/reward_functions/common/cue_ball_pocketed_reward.py +++ b/fastfiz_env/reward_functions/common/cue_ball_pocketed_reward.py @@ -1,7 +1,8 @@ -from .. import BinaryReward import fastfiz as ff import numpy as np +from .. import BinaryReward + class CueBallPocketedReward(BinaryReward): """ diff --git a/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py b/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py index eead78a..b58d5ca 100644 --- a/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py +++ b/fastfiz_env/reward_functions/common/delta_best_total_distance_reward.py @@ -1,12 +1,13 @@ -from ..reward_function import RewardFunction +import fastfiz as ff +import numpy as np + from ...utils.fastfiz import ( distances_to_closest_pocket, get_ball_positions, num_balls_in_play, pocket_centers, ) -import fastfiz as ff -import numpy as np +from ..reward_function import RewardFunction class DeltaBestTotalDistanceReward(RewardFunction): diff --git a/fastfiz_env/reward_functions/common/exponential_velocity_reward.py b/fastfiz_env/reward_functions/common/exponential_velocity_reward.py index 1779cbe..7620b59 100644 --- a/fastfiz_env/reward_functions/common/exponential_velocity_reward.py +++ b/fastfiz_env/reward_functions/common/exponential_velocity_reward.py @@ -1,7 +1,8 @@ -from ..reward_function import RewardFunction import fastfiz as ff import numpy as np +from ..reward_function import RewardFunction + class ExponentialVelocityReward(RewardFunction): """ diff --git a/fastfiz_env/reward_functions/common/game_won_reward.py b/fastfiz_env/reward_functions/common/game_won_reward.py index 8f17519..6c1839f 100644 --- a/fastfiz_env/reward_functions/common/game_won_reward.py +++ b/fastfiz_env/reward_functions/common/game_won_reward.py @@ -1,7 +1,8 @@ -from .. import BinaryReward import fastfiz as ff import numpy as np +from .. import BinaryReward + class GameWonReward(BinaryReward): """ diff --git a/fastfiz_env/reward_functions/common/impossible_shot_reward.py b/fastfiz_env/reward_functions/common/impossible_shot_reward.py index d744eb5..a9c5ba9 100644 --- a/fastfiz_env/reward_functions/common/impossible_shot_reward.py +++ b/fastfiz_env/reward_functions/common/impossible_shot_reward.py @@ -1,7 +1,8 @@ -from .. import BinaryReward import fastfiz as ff import numpy as np +from .. import BinaryReward + class ImpossibleShotReward(BinaryReward): """ diff --git a/fastfiz_env/reward_functions/common/step_no_balls_pocketed_reward.py b/fastfiz_env/reward_functions/common/step_no_balls_pocketed_reward.py index 043fd81..273cb22 100644 --- a/fastfiz_env/reward_functions/common/step_no_balls_pocketed_reward.py +++ b/fastfiz_env/reward_functions/common/step_no_balls_pocketed_reward.py @@ -1,8 +1,9 @@ -from ...reward_functions import BinaryReward -from ...utils.fastfiz.fastfiz import num_balls_pocketed import fastfiz as ff import numpy as np +from ...reward_functions import BinaryReward +from ...utils.fastfiz.fastfiz import num_balls_pocketed + class StepNoBallsPocketedReward(BinaryReward): """ diff --git a/fastfiz_env/reward_functions/common/step_pocketed_reward.py b/fastfiz_env/reward_functions/common/step_pocketed_reward.py index a319202..a351069 100644 --- a/fastfiz_env/reward_functions/common/step_pocketed_reward.py +++ b/fastfiz_env/reward_functions/common/step_pocketed_reward.py @@ -1,8 +1,9 @@ -from ..reward_function import RewardFunction -from ...utils.fastfiz.fastfiz import num_balls_pocketed import fastfiz as ff import numpy as np +from ...utils.fastfiz.fastfiz import num_balls_pocketed +from ..reward_function import RewardFunction + class StepPocketedReward(RewardFunction): """ diff --git a/fastfiz_env/reward_functions/common/total_distance_reward.py b/fastfiz_env/reward_functions/common/total_distance_reward.py index 30e05ac..399bc64 100644 --- a/fastfiz_env/reward_functions/common/total_distance_reward.py +++ b/fastfiz_env/reward_functions/common/total_distance_reward.py @@ -1,12 +1,13 @@ +import fastfiz as ff import numpy as np -from ..reward_function import RewardFunction + from ...utils.fastfiz import ( distances_to_closest_pocket, get_ball_positions, num_balls_in_play, pocket_centers, ) -import fastfiz as ff +from ..reward_function import RewardFunction class TotalDistanceReward(RewardFunction): diff --git a/fastfiz_env/reward_functions/common/velocity_reward.py b/fastfiz_env/reward_functions/common/velocity_reward.py index 8ca998a..a0e01f1 100644 --- a/fastfiz_env/reward_functions/common/velocity_reward.py +++ b/fastfiz_env/reward_functions/common/velocity_reward.py @@ -1,7 +1,8 @@ -from ..reward_function import RewardFunction import fastfiz as ff import numpy as np +from ..reward_function import RewardFunction + class VelocityReward(RewardFunction): """ diff --git a/fastfiz_env/reward_functions/default_reward.py b/fastfiz_env/reward_functions/default_reward.py index b439514..32fbfb3 100644 --- a/fastfiz_env/reward_functions/default_reward.py +++ b/fastfiz_env/reward_functions/default_reward.py @@ -1,18 +1,17 @@ from .combined_reward import CombinedReward from .common import ( + # ExponentialVelocityReward, + BallsNotMovedReward, ConstantReward, - StepPocketedReward, - GameWonReward, - CueBallPocketedReward, - ConstantWeightBalls, - NegativeConstantWeightMaxSteps, ConstantWeight, + ConstantWeightBalls, + CueBallPocketedReward, + GameWonReward, NegativeConstantWeight, - # ExponentialVelocityReward, - BallsNotMovedReward, + NegativeConstantWeightMaxSteps, + StepPocketedReward, ) - rewards = [ GameWonReward(ConstantWeight), CueBallPocketedReward(NegativeConstantWeight), diff --git a/fastfiz_env/reward_functions/reward_function.py b/fastfiz_env/reward_functions/reward_function.py index 32545dd..682bda2 100644 --- a/fastfiz_env/reward_functions/reward_function.py +++ b/fastfiz_env/reward_functions/reward_function.py @@ -1,9 +1,10 @@ from abc import ABC, abstractmethod +from typing import Callable, Optional, TypeAlias, Union + import fastfiz as ff -from typing import Union, Callable, Optional, TypeAlias import numpy as np -from fastfiz_env.utils.fastfiz.fastfiz import num_balls_in_play +from fastfiz_env.utils.fastfiz.fastfiz import num_balls_in_play Weight: TypeAlias = Union[float, Callable[[int, int, Optional[int]], float]] """ diff --git a/fastfiz_env/reward_functions/winning_reward.py b/fastfiz_env/reward_functions/winning_reward.py index 34994c1..9b29699 100644 --- a/fastfiz_env/reward_functions/winning_reward.py +++ b/fastfiz_env/reward_functions/winning_reward.py @@ -1,15 +1,15 @@ from .combined_reward import CombinedReward from .common import ( ConstantReward, - StepPocketedReward, - GameWonReward, - CueBallPocketedReward, - ConstantWeightBalls, - NegativeConstantWeightMaxSteps, ConstantWeight, - NegativeConstantWeight, + ConstantWeightBalls, + CueBallPocketedReward, ExponentialVelocityReward, + GameWonReward, + NegativeConstantWeight, + NegativeConstantWeightMaxSteps, StepNoBallsPocketedReward, + StepPocketedReward, ) rewards = [ diff --git a/fastfiz_env/utils/fastfiz/__init__.py b/fastfiz_env/utils/fastfiz/__init__.py index e60602b..12c761d 100644 --- a/fastfiz_env/utils/fastfiz/__init__.py +++ b/fastfiz_env/utils/fastfiz/__init__.py @@ -30,18 +30,21 @@ from .fastfiz import ( + POCKETS, action_to_shot, any_ball_has_moved, ball_overlaps, + ball_state_to_pocket, create_random_table_state, create_table_state, distance_to_closest_pocket, distance_to_pocket, distance_to_pockets, distances_to_closest_pocket, - get_ball_positions_id, get_ball_positions, + get_ball_positions_id, get_ball_velocity, + get_pocket_center, is_pocketed_state, map_action_to_shot_params, normalize_ball_positions, @@ -54,7 +57,4 @@ shotparams_to_list, shotparams_to_string, table_state_to_string, - ball_state_to_pocket, - get_pocket_center, - POCKETS, ) diff --git a/fastfiz_env/utils/fastfiz/fastfiz.py b/fastfiz_env/utils/fastfiz/fastfiz.py index 8c65aeb..6a564e2 100644 --- a/fastfiz_env/utils/fastfiz/fastfiz.py +++ b/fastfiz_env/utils/fastfiz/fastfiz.py @@ -1,6 +1,7 @@ -import numpy as np from typing import Optional + import fastfiz as ff +import numpy as np from gymnasium import spaces POCKETS = [ diff --git a/fastfiz_env/wrappers/action.py b/fastfiz_env/wrappers/action.py index f28a147..86064ce 100644 --- a/fastfiz_env/wrappers/action.py +++ b/fastfiz_env/wrappers/action.py @@ -1,9 +1,10 @@ -from .utils import cart2sph, sph2deg -from gymnasium import ActionWrapper -from gymnasium import spaces -import numpy as np from enum import Enum +import numpy as np +from gymnasium import ActionWrapper, spaces + +from .utils import cart2sph, sph2deg + class ActionSpaces(Enum): VECTOR_2D = (0,) diff --git a/scripts/random_policy.py b/scripts/random_policy.py index 47d06f4..af7a297 100644 --- a/scripts/random_policy.py +++ b/scripts/random_policy.py @@ -1,13 +1,11 @@ -from torch.utils.tensorboard import SummaryWriter - +import numpy as np from stable_baselines3.common.env_util import make_vec_env +from torch.utils.tensorboard.writer import SummaryWriter + +import fastfiz_env from fastfiz_env.reward_functions.default_reward import DefaultReward from fastfiz_env.wrappers.action import ActionSpaces -import fastfiz_env - -# Create and wrap the environment -# env = DummyVecEnv([lambda: gym.make("CartPole-v1")]) env = fastfiz_env.make_callable_wrapped_env( "PocketsFastFiz-v0", max_episode_steps=20, @@ -22,9 +20,7 @@ eval_freq = 50_000 eval_episodes = 100 total_runs = (total_timesteps // eval_freq) * eval_episodes -# total_runs = 10000 -# Initialize the SummaryWriter writer = SummaryWriter(log_dir="logs/random_policy", comment="-random-policy") total_success = 0 @@ -33,18 +29,15 @@ for episode in range(total_runs): obs = env.reset() - # Your policy rollout code here done = False while not done: action = env.action_space.sample() # Random policy - obs, reward, done, info = env.step([action]) + obs, reward, done, info = env.step(np.array(action)) total_len += 1 total_reward += reward total_success += int(info[0]["is_success"]) - # Log episode reward - success_mean = total_success / total_runs episode_mean = total_len / total_runs @@ -58,6 +51,5 @@ writer.add_scalar("eval/mean_reward", rew_mean, episode) writer.add_scalar("eval/mean_ep_length", episode_mean, episode) -# Close the environment and SummaryWriter env.close() writer.close() diff --git a/setup.py b/setup.py index 383a3a2..ad966b0 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import re -from setuptools import setup, find_packages + +from setuptools import find_packages, setup def get_version(): diff --git a/tests/envs/test_envs.py b/tests/envs/test_envs.py index 3f63d10..5c74e78 100644 --- a/tests/envs/test_envs.py +++ b/tests/envs/test_envs.py @@ -1,4 +1,5 @@ import unittest + from fastfiz_env.envs import FastFiz from fastfiz_env.reward_functions.common import ConstantReward from fastfiz_env.wrappers import TimeLimitInjectionWrapper diff --git a/tests/utils/test_fastfiz.py b/tests/utils/test_fastfiz.py index 0f63a3b..1f5de24 100644 --- a/tests/utils/test_fastfiz.py +++ b/tests/utils/test_fastfiz.py @@ -1,10 +1,12 @@ import unittest + import fastfiz as ff + from fastfiz_env.utils.fastfiz import ( - create_table_state, - num_balls_in_play, any_ball_has_moved, + create_table_state, get_ball_positions, + num_balls_in_play, ) diff --git a/tests/utils/test_reward_functions.py b/tests/utils/test_reward_functions.py index dabe1df..781e782 100644 --- a/tests/utils/test_reward_functions.py +++ b/tests/utils/test_reward_functions.py @@ -1,23 +1,25 @@ import unittest + import fastfiz as ff +import numpy as np + +from fastfiz_env.reward_functions import CombinedReward from fastfiz_env.reward_functions.common import ( ConstantReward, - StepPocketedReward, - GameWonReward, - CueBallPocketedReward, - CueBallNotMovedReward, - ImpossibleShotReward, - DeltaBestTotalDistanceReward, - TotalDistanceReward, ConstantWeightBalls, + ConstantWeightCurrentStep, ConstantWeightMaxSteps, - NegativeConstantWeightMaxSteps, ConstantWeightNumBalls, - ConstantWeightCurrentStep, + CueBallNotMovedReward, + CueBallPocketedReward, + DeltaBestTotalDistanceReward, + GameWonReward, + ImpossibleShotReward, + NegativeConstantWeightMaxSteps, + StepPocketedReward, + TotalDistanceReward, ) -from fastfiz_env.reward_functions import CombinedReward from fastfiz_env.utils.fastfiz import create_table_state -import numpy as np def weight_fn(num_balls: int, current_step: int, max_steps: int | None) -> float: diff --git a/tests/utils/test_wrappers.py b/tests/utils/test_wrappers.py index 8cf83a1..4bde46f 100644 --- a/tests/utils/test_wrappers.py +++ b/tests/utils/test_wrappers.py @@ -1,4 +1,5 @@ import unittest + from fastfiz_env.wrappers.utils import cart2sph, sph2deg