diff --git a/README.md b/README.md index 4547386b..56e88ef6 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ You may need to example run permissions on the game executable. `chmod +x exampl 3. Train and visualize ```bash -gdrl --env=gdrl --env_path=examples/godot_rl_JumperHard/bin/JumperHard.x86_64 --viz +gdrl --env=gdrl --env_path=examples/godot_rl_JumperHard/bin/JumperHard.x86_64 --experiment_name=Experiment_01 --viz ``` ### In editor interactive training diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md index 105a907d..9f3eba77 100644 --- a/docs/ADV_RLLIB.md +++ b/docs/ADV_RLLIB.md @@ -22,7 +22,7 @@ chmod +x examples/godot_rl_/bin/.x86_64 # linux example • Train a model from scratch: ``` -gdrl --trainer=rllib --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --speedup=8 --viz +gdrl --trainer=rllib --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --speedup=8 --experiment_name=Experiment_01 --viz ``` By default rllib will use the hyperparameters in the **ppo_test.yaml** file on the github repo. You can either modify this file, or create your own one. diff --git a/docs/ADV_SAMPLE_FACTORY.md b/docs/ADV_SAMPLE_FACTORY.md index eae23864..84347b70 100644 --- a/docs/ADV_SAMPLE_FACTORY.md +++ b/docs/ADV_SAMPLE_FACTORY.md @@ -45,7 +45,7 @@ chmod +x examples/godot_rl_/bin/.x86_64 # linux example • Train a model from scratch: ```bash -gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --num_workers=10 --experiment=BallChase --viz --speedup=8 --batched_sampling=True +gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --num_workers=10 --experiment_name=BallChase --viz --speedup=8 --batched_sampling=True ``` • Download a pretrained checkpoint from the HF hub: @@ -57,7 +57,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_ • Visualize a trained model: ```bash -gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --num_workers=1 --experiment= --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=/sample_factory_ +gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --num_workers=1 --experiment_name= --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=/sample_factory_ ``` ## Advanced Environment Usage @@ -74,7 +74,7 @@ chmod +x examples/godot_rl_Racer/bin/Racer.x86_64 # linux example • Train a model from scratch: ```bash -gdrl--trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --train_for_env_steps=10000000 --experiment=Racer --reward_scale=0.01 --worker_num_splits=2 --num_envs_per_worker=2 --num_workers=40 --speedup=8 --batched_sampling=True --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.0001 --exploration_loss_coef=0.0001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.04 --use_rnn=True --recurrence=32 +gdrl--trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --train_for_env_steps=10000000 --experiment_name=Racer --reward_scale=0.01 --worker_num_splits=2 --num_envs_per_worker=2 --num_workers=40 --speedup=8 --batched_sampling=True --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.0001 --exploration_loss_coef=0.0001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.04 --use_rnn=True --recurrence=32 ``` • Download a pretrained checkpoint from the HF hub: @@ -86,7 +86,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_ • Visualize a trained model: ```bash -gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --num_workers=1 --experiment=Racer --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_Racer +gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --num_workers=1 --experiment_name=Racer --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_Racer ``` ### Usage instructions for env **MultiAgent FPS** @@ -101,7 +101,7 @@ chmod +x examples/godot_rl_FPS/bin/FPS.x86_64 # linux example • Train a model from scratch: ```bash -gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=10 --experiment=FPS --viz --batched_sampling=True --speedup=8 --num_workers=80 --batched_sampling=False --num_policies=4 --with_pbt=True --pbt_period_env_steps=1000000 --pbt_start_mutation=1000000 --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.00005 --exploration_loss_coef=0.001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.08 --use_rnn=True --recurrence=32 +gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=10 --experiment_name=FPS --viz --batched_sampling=True --speedup=8 --num_workers=80 --batched_sampling=False --num_policies=4 --with_pbt=True --pbt_period_env_steps=1000000 --pbt_start_mutation=1000000 --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.00005 --exploration_loss_coef=0.001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.08 --use_rnn=True --recurrence=32 ``` • Download a pretrained checkpoint from the HF hub: @@ -113,7 +113,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_ • Visualize a trained model: ```bash -gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=1 --experiment=FPS --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_FPS +gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=1 --experiment_name=FPS --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_FPS ``` ## Training on a cluster diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index e2db8d38..c3d224bb 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -39,14 +39,14 @@ chmod +x examples/godot_rl_/bin/.x86_64 # linux example ### Train a model from scratch: ```bash -gdrl --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --viz +gdrl --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --experiment_name=Experiment_01 --viz ``` While the default options for sb3 work reasonably well. You may be interested in changing the hyperparameters. We recommend taking the [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py) and modifying to match your needs. -This example exposes more parameter for the user to configure, such as `--speedup` to run the environment faster than realtime and the `n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training). +This example exposes more parameter for the user to configure, such as `--speedup` to run the environment faster than realtime and the `--n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training). ```python diff --git a/docs/EXAMPLE_ENVIRONMENTS.md b/docs/EXAMPLE_ENVIRONMENTS.md index 4dfb3255..0ca6551a 100644 --- a/docs/EXAMPLE_ENVIRONMENTS.md +++ b/docs/EXAMPLE_ENVIRONMENTS.md @@ -36,7 +36,7 @@ For the current version, we provide 4 example environments, located in **envs/ex ### Example training: The agent can be trained with the following command: ``` -gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --config_file envs/configs/ppo_config_jumper_hard.yaml +gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --config_file envs/configs/ppo_config_jumper_hard.yaml --experiment_name=Experiment_01 ``` Training logs will be output by default to **/home/USER/ray_results/PPO/jumper_hard/** You can monitor training curves etc with tensorboard @@ -75,7 +75,7 @@ gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --eval --restore envs/ ### Example training: The agent can be trained with the following command: ``` -gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --config_file envs/configs/ppo_config_ball_chase.yaml +gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --config_file envs/configs/ppo_config_ball_chase.yaml --experiment_name=BallChase_01 ``` Training logs will be output by default to **/home/USER/ray_results/PPO/ball_chase/** You can monitor training curves etc with tensorboard @@ -112,7 +112,7 @@ gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --eval --restore envs/ch ### Example training: The agent can be trained with the following command: ``` -gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --config_file envs/configs/ppo_config_fly_by.yaml +gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --config_file envs/configs/ppo_config_fly_by.yaml --experiment_name=FlyBy_01 ``` Training logs will be output by default to **/home/USER/ray_results/PPO/fly_by/** You can monitor training curves etc with tensorboard @@ -153,7 +153,7 @@ gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --eval --restore envs/checkpoint ### Example training: The agent can be trained with the following command: ``` -gdrl --env_path envs/builds/SpaceShooter/space_shooter.x86_64 --config_file envs/configs/ppo_config_space_shooter.yaml +gdrl --env_path envs/builds/SpaceShooter/space_shooter.x86_64 --config_file envs/configs/ppo_config_space_shooter.yaml --experiment_name=Shooter_01 ``` Training logs will be output by default to **/home/USER/ray_results/PPO/space_shooter/** You can monitor training curves etc with tensorboard diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py index 6420e368..0136c91d 100644 --- a/examples/clean_rl_example.py +++ b/examples/clean_rl_example.py @@ -17,8 +17,10 @@ def parse_args(): # fmt: off parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") + parser.add_argument("--experiment_dir", default="logs/cleanrl", type=str, + help="The name of the the experiment directory, in which the tensorboard logs are getting stored") + parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str, + help="The name of the the experiment, which will be displayed in tensborboard") parser.add_argument("--seed", type=int, default=1, help="seed of the experiment") parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, @@ -124,7 +126,7 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": args = parse_args() - run_name = f"{args.env_path}__{args.exp_name}__{args.seed}__{int(time.time())}" + run_name = f"{args.experiment_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -137,7 +139,7 @@ def get_action_and_value(self, x, action=None): # monitor_gym=True, no longer works for gymnasium save_code=True, ) - writer = SummaryWriter(f"runs/{run_name}") + writer = SummaryWriter(f"{args.experiment_dir}/{run_name}") writer.add_text( "hyperparameters", "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index 29e92e1a..92b043e1 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -17,6 +17,18 @@ type=str, help="The Godot binary to use, do not include for in editor training", ) +parser.add_argument( + "--experiment_dir", + default="logs/sb3", + type=str, + help="The name of the the experiment directory, in which the tensorboard logs are getting stored", +) +parser.add_argument( + "--experiment_name", + default="Experiment", + type=str, + help="The name of the the experiment, which will be displayed in tensborboard", +) parser.add_argument( "--onnx_export_path", default=None, @@ -32,8 +44,8 @@ env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup) env = VecMonitor(env) -model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log="logs/log") -model.learn(1000000) +model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log=args.experiment_dir) +model.learn(1000000, tb_log_name=args.experiment_name) print("closing env") env.close() diff --git a/godot_rl/main.py b/godot_rl/main.py index 6fe6e6fc..329049a7 100644 --- a/godot_rl/main.py +++ b/godot_rl/main.py @@ -25,7 +25,6 @@ try: from godot_rl.wrappers.ray_wrapper import rllib_training except ImportError as e: - print("Warning: ", e) def rllib_training(args, extras): print("Import error when trying to use rllib. If you have not installed the package, try: pip install godot-rl[rllib]") print("Otherwise try fixing the error above.") @@ -34,7 +33,6 @@ def rllib_training(args, extras): try: from godot_rl.wrappers.stable_baselines_wrapper import stable_baselines_training except ImportError as e: - print("Warning: ", e) def stable_baselines_training(args, extras): print( "Import error when trying to use sb3. If you have not installed the package, try: pip install godot-rl[sb3]" @@ -44,7 +42,6 @@ def stable_baselines_training(args, extras): try: from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy except ImportError as e: - print("Warning: ", e) def sample_factory_training(args, extras): print( "Import error when trying to use sample-factory If you have not installed the package, try: pip install godot-rl[sf]" @@ -54,21 +51,16 @@ def sample_factory_training(args, extras): def get_args(): parser = argparse.ArgumentParser(allow_abbrev=False) - parser.add_argument( - "--trainer", - default="sb3", - choices=["sb3", "sf", "rllib"], - type=str, - help="framework to use (rllib or stable-baselines)", - ) + parser.add_argument("--trainer", default="sb3", choices=["sb3", "sf", "rllib"], type=str, help="framework to use (rllib, sf, sb3)") parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use") - parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file (used by rllib)") + parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file [only for rllib]") parser.add_argument("--restore", default=None, type=str, help="the location of a checkpoint to restore from") parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model") parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env") parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model") parser.add_argument("--num_gpus", default=None, type=int, help="Number of GPUs to use [only for rllib]") - parser.add_argument("--experiment_name", default=None, type=str, help="The name of the experiment [only for rllib]") + parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored") + parser.add_argument("--experiment_name", default=None, type=str, help="The name of the the experiment, which will be displayed in tensborboard") parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process") return parser.parse_known_args() diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py index e8605f03..1eb5b135 100644 --- a/godot_rl/wrappers/ray_wrapper.py +++ b/godot_rl/wrappers/ray_wrapper.py @@ -160,6 +160,7 @@ def rllib_training(args, extras): checkpoint_freq=checkpoint_freq, checkpoint_at_end=not args.eval, restore=args.restore, + local_dir=args.experiment_dir or "logs/rllib", trial_name_creator=lambda trial: f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}" ) if args.export: diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py index d7bd928b..e81cccaf 100644 --- a/godot_rl/wrappers/sample_factory_wrapper.py +++ b/godot_rl/wrappers/sample_factory_wrapper.py @@ -164,6 +164,18 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False): type=int, help="Num agents in each envpool (if used)", ) + p.add_argument( + "--experiment_dir", + default="logs/sf", + type=str, + help="The name of the the experiment directory, in which the tensorboard logs are getting stored", + ) + p.add_argument( + "--experiment_name", + default=None, + type=str, + help="The name of the the experiment, which will be displayed in tensborboard", + ) def parse_gdrl_args(argv=None, evaluation=False): @@ -171,6 +183,9 @@ def parse_gdrl_args(argv=None, evaluation=False): add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation) gdrl_override_defaults(partial_cfg.env, parser) final_cfg = parse_full_cfg(parser, argv) + args, _ = parser.parse_known_args() + final_cfg.train_dir = args.experiment_dir or "logs/sf" + final_cfg.experiment = args.experiment_name or final_cfg.experiment return final_cfg diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py index b6348575..2134c6bc 100644 --- a/godot_rl/wrappers/stable_baselines_wrapper.py +++ b/godot_rl/wrappers/stable_baselines_wrapper.py @@ -2,6 +2,7 @@ import numpy as np from stable_baselines3 import PPO from stable_baselines3.common.vec_env.base_vec_env import VecEnv +from stable_baselines3.common.vec_env.vec_monitor import VecMonitor from typing import Any, Dict, List, Optional, Tuple, Union from godot_rl.core.godot_env import GodotEnv @@ -128,6 +129,7 @@ def step_wait(self) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> None: # Initialize the custom environment env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs) + env = VecMonitor(env) # Initialize the PPO model model = PPO( @@ -136,11 +138,11 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> ent_coef=0.0001, verbose=2, n_steps=32, - tensorboard_log="logs/log", + tensorboard_log=args.experiment_dir or "logs/sb3", ) # Train the model - model.learn(n_steps) + model.learn(n_steps, tb_log_name=args.experiment_name) print("closing env") env.close() diff --git a/setup.cfg b/setup.cfg index 482f08be..e3c54c2b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,7 +50,7 @@ sb3 = huggingface_sb3 sf = - sample-factory + sample-factory==2.0.3 gym==0.26.2 rllib = @@ -66,9 +66,9 @@ all = numpy==1.23.5 gym==0.26.2 stable-baselines3==1.2.0 - huggingface_sb3 - sample-factory - + sample-factory==2.0.3 ray==2.2.0 ray[rllib] + + huggingface_sb3 tensorflow_probability