Merge pull request #126 from edbeeching/feature/experiment-name

Feature: experiment_dir + experiment_name
edbeeching · Jul 16, 2023 · cfa756c · cfa756c
2 parents 85b787a + e377033
commit cfa756c
Show file tree

Hide file tree

Showing 12 changed files with 62 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ You may need to example run permissions on the game executable. `chmod +x exampl
 3. Train and visualize 
 
 ```bash
-gdrl --env=gdrl --env_path=examples/godot_rl_JumperHard/bin/JumperHard.x86_64 --viz
+gdrl --env=gdrl --env_path=examples/godot_rl_JumperHard/bin/JumperHard.x86_64 --experiment_name=Experiment_01 --viz
 ```
 
 ### In editor interactive training

diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md
@@ -22,7 +22,7 @@ chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 • Train a model from scratch:
 
 ```
-gdrl --trainer=rllib --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --speedup=8 --viz
+gdrl --trainer=rllib --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --speedup=8 --experiment_name=Experiment_01 --viz
 ```
 
 By default rllib will use the hyperparameters in the **ppo_test.yaml** file on the github repo. You can either modify this file, or create your own one.

diff --git a/docs/ADV_SAMPLE_FACTORY.md b/docs/ADV_SAMPLE_FACTORY.md
@@ -45,7 +45,7 @@ chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 • Train a model from scratch:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=10 --experiment=BallChase --viz  --speedup=8 --batched_sampling=True
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=10 --experiment_name=BallChase --viz  --speedup=8 --batched_sampling=True
 ```
 
 • Download a pretrained checkpoint from the HF hub:
@@ -57,7 +57,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_
 • Visualize a trained model:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=1 --experiment=<ENV_NAME> --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=<HF_USERNAME>/sample_factory_<ENV_NAME>
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=1 --experiment_name=<ENV_NAME> --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=<HF_USERNAME>/sample_factory_<ENV_NAME>
 ```
 
 ## Advanced Environment Usage
@@ -74,7 +74,7 @@ chmod +x examples/godot_rl_Racer/bin/Racer.x86_64 # linux example
 • Train a model from scratch:
 
 ```bash
-gdrl--trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --train_for_env_steps=10000000 --experiment=Racer --reward_scale=0.01 --worker_num_splits=2 --num_envs_per_worker=2 --num_workers=40 --speedup=8 --batched_sampling=True --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2  --learning_rate=0.0001 --exploration_loss_coef=0.0001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.04 --use_rnn=True --recurrence=32
+gdrl--trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --train_for_env_steps=10000000 --experiment_name=Racer --reward_scale=0.01 --worker_num_splits=2 --num_envs_per_worker=2 --num_workers=40 --speedup=8 --batched_sampling=True --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2  --learning_rate=0.0001 --exploration_loss_coef=0.0001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.04 --use_rnn=True --recurrence=32
 ```
 
 • Download a pretrained checkpoint from the HF hub:
@@ -86,7 +86,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_
 • Visualize a trained model:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --num_workers=1 --experiment=Racer --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_Racer
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --num_workers=1 --experiment_name=Racer --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_Racer
 ```
 
 ### Usage instructions for env **MultiAgent FPS**
@@ -101,7 +101,7 @@ chmod +x examples/godot_rl_FPS/bin/FPS.x86_64 # linux example
 • Train a model from scratch:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=10 --experiment=FPS --viz --batched_sampling=True --speedup=8 --num_workers=80 --batched_sampling=False --num_policies=4 --with_pbt=True --pbt_period_env_steps=1000000 --pbt_start_mutation=1000000 --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.00005 --exploration_loss_coef=0.001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.08 --use_rnn=True --recurrence=32
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=10 --experiment_name=FPS --viz --batched_sampling=True --speedup=8 --num_workers=80 --batched_sampling=False --num_policies=4 --with_pbt=True --pbt_period_env_steps=1000000 --pbt_start_mutation=1000000 --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.00005 --exploration_loss_coef=0.001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.08 --use_rnn=True --recurrence=32
 ```
 
 • Download a pretrained checkpoint from the HF hub:
@@ -113,7 +113,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_
 • Visualize a trained model:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=1 --experiment=FPS --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_FPS
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=1 --experiment_name=FPS --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_FPS
 ```
 
 ## Training on a cluster

diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md
@@ -39,14 +39,14 @@ chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 ### Train a model from scratch:
 
 ```bash
-gdrl --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --viz
+gdrl --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --experiment_name=Experiment_01 --viz
 ```
 
 While the default options for sb3 work reasonably well. You may be interested in changing the hyperparameters.
 
 We recommend taking the [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py) and modifying to match your needs. 
 
-This example exposes more parameter for the user to configure, such as `--speedup` to run the environment faster than realtime and the `n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training).
+This example exposes more parameter for the user to configure, such as `--speedup` to run the environment faster than realtime and the `--n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training).
 
 
 ```python

diff --git a/docs/EXAMPLE_ENVIRONMENTS.md b/docs/EXAMPLE_ENVIRONMENTS.md
@@ -36,7 +36,7 @@ For the current version, we provide 4 example environments, located in **envs/ex
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --config_file envs/configs/ppo_config_jumper_hard.yaml
+gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --config_file envs/configs/ppo_config_jumper_hard.yaml --experiment_name=Experiment_01
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/jumper_hard/**
 You can monitor training curves etc with tensorboard
@@ -75,7 +75,7 @@ gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --eval --restore envs/
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --config_file envs/configs/ppo_config_ball_chase.yaml
+gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --config_file envs/configs/ppo_config_ball_chase.yaml --experiment_name=BallChase_01 
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/ball_chase/**
 You can monitor training curves etc with tensorboard
@@ -112,7 +112,7 @@ gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --eval --restore envs/ch
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --config_file envs/configs/ppo_config_fly_by.yaml
+gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --config_file envs/configs/ppo_config_fly_by.yaml --experiment_name=FlyBy_01 
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/fly_by/**
 You can monitor training curves etc with tensorboard
@@ -153,7 +153,7 @@ gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --eval --restore envs/checkpoint
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/SpaceShooter/space_shooter.x86_64 --config_file envs/configs/ppo_config_space_shooter.yaml
+gdrl --env_path envs/builds/SpaceShooter/space_shooter.x86_64 --config_file envs/configs/ppo_config_space_shooter.yaml --experiment_name=Shooter_01 
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/space_shooter/**
 You can monitor training curves etc with tensorboard

diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py
@@ -17,8 +17,10 @@
 def parse_args():
     # fmt: off
     parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
+    parser.add_argument("--experiment_dir", default="logs/cleanrl", type=str,
+        help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
+    parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str,
+        help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--seed", type=int, default=1,
         help="seed of the experiment")
     parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
@@ -124,7 +126,7 @@ def get_action_and_value(self, x, action=None):
 
 if __name__ == "__main__":
     args = parse_args()
-    run_name = f"{args.env_path}__{args.exp_name}__{args.seed}__{int(time.time())}"
+    run_name = f"{args.experiment_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
 
@@ -137,7 +139,7 @@ def get_action_and_value(self, x, action=None):
             # monitor_gym=True, no longer works for gymnasium
             save_code=True,
         )
-    writer = SummaryWriter(f"runs/{run_name}")
+    writer = SummaryWriter(f"{args.experiment_dir}/{run_name}")
     writer.add_text(
         "hyperparameters",
         "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),

diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
@@ -17,6 +17,18 @@
     type=str,
     help="The Godot binary to use, do not include for in editor training",
 )
+parser.add_argument(
+    "--experiment_dir",
+    default="logs/sb3",
+    type=str,
+    help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
+)
+parser.add_argument(
+    "--experiment_name",
+    default="Experiment",
+    type=str,
+    help="The name of the the experiment, which will be displayed in tensborboard",
+)
 parser.add_argument(
     "--onnx_export_path",
     default=None,
@@ -32,8 +44,8 @@
 env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup)
 env = VecMonitor(env)
 
-model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log="logs/log")
-model.learn(1000000)
+model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log=args.experiment_dir)
+model.learn(1000000, tb_log_name=args.experiment_name)
 
 print("closing env")
 env.close()

diff --git a/godot_rl/main.py b/godot_rl/main.py
@@ -25,7 +25,6 @@
 try:
     from godot_rl.wrappers.ray_wrapper import rllib_training
 except ImportError as e:
-    print("Warning: ", e)
     def rllib_training(args, extras):
         print("Import error when trying to use rllib. If you have not installed the package, try: pip install godot-rl[rllib]")
         print("Otherwise try fixing the error above.")
@@ -34,7 +33,6 @@ def rllib_training(args, extras):
 try:
     from godot_rl.wrappers.stable_baselines_wrapper import stable_baselines_training
 except ImportError as e:
-    print("Warning: ", e)
     def stable_baselines_training(args, extras):
         print(
             "Import error when trying to use sb3. If you have not installed the package, try: pip install godot-rl[sb3]"
@@ -44,7 +42,6 @@ def stable_baselines_training(args, extras):
 try:
     from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy
 except ImportError as e:
-    print("Warning: ", e)
     def sample_factory_training(args, extras):
         print(
             "Import error when trying to use sample-factory If you have not installed the package, try: pip install godot-rl[sf]"
@@ -54,21 +51,16 @@ def sample_factory_training(args, extras):
 
 def get_args():
     parser = argparse.ArgumentParser(allow_abbrev=False)
-    parser.add_argument(
-        "--trainer",
-        default="sb3",
-        choices=["sb3", "sf", "rllib"],
-        type=str,
-        help="framework to use (rllib or stable-baselines)",
-    )
+    parser.add_argument("--trainer", default="sb3", choices=["sb3", "sf", "rllib"], type=str, help="framework to use (rllib, sf, sb3)")
     parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use")
-    parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file (used by rllib)")
+    parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file [only for rllib]")
     parser.add_argument("--restore", default=None, type=str, help="the location of a checkpoint to restore from")
     parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model")
     parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env")
     parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model")
     parser.add_argument("--num_gpus", default=None, type=int, help="Number of GPUs to use [only for rllib]")
-    parser.add_argument("--experiment_name", default=None, type=str, help="The name of the experiment [only for rllib]")
+    parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
+    parser.add_argument("--experiment_name", default=None, type=str, help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
 
     return parser.parse_known_args()

diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
@@ -160,6 +160,7 @@ def rllib_training(args, extras):
         checkpoint_freq=checkpoint_freq,
         checkpoint_at_end=not args.eval,
         restore=args.restore,
+        local_dir=args.experiment_dir or "logs/rllib",
         trial_name_creator=lambda trial: f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}"
     )
     if args.export:

diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -164,13 +164,28 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
         type=int,
         help="Num agents in each envpool (if used)",
     )
+    p.add_argument(
+        "--experiment_dir",
+        default="logs/sf",
+        type=str,
+        help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
+    )
+    p.add_argument(
+        "--experiment_name",
+        default=None,
+        type=str,
+        help="The name of the the experiment, which will be displayed in tensborboard",
+    )
 
 
 def parse_gdrl_args(argv=None, evaluation=False):
     parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
     add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation)
     gdrl_override_defaults(partial_cfg.env, parser)
     final_cfg = parse_full_cfg(parser, argv)
+    args, _ = parser.parse_known_args()
+    final_cfg.train_dir = args.experiment_dir or "logs/sf"
+    final_cfg.experiment = args.experiment_name or final_cfg.experiment
     return final_cfg
 
 

diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py
@@ -2,6 +2,7 @@
 import numpy as np
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env.base_vec_env import VecEnv
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from godot_rl.core.godot_env import GodotEnv
@@ -128,6 +129,7 @@ def step_wait(self) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List
 def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> None:
     # Initialize the custom environment
     env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs)
+    env = VecMonitor(env)
 
     # Initialize the PPO model
     model = PPO(
@@ -136,11 +138,11 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) ->
         ent_coef=0.0001,
         verbose=2,
         n_steps=32,
-        tensorboard_log="logs/log",
+        tensorboard_log=args.experiment_dir or "logs/sb3",
     )
 
     # Train the model
-    model.learn(n_steps)
+    model.learn(n_steps, tb_log_name=args.experiment_name)
 
     print("closing env")
     env.close()
diff --git a/setup.cfg b/setup.cfg
@@ -50,7 +50,7 @@ sb3 =
     huggingface_sb3
 
 sf =
-    sample-factory
+    sample-factory==2.0.3
     gym==0.26.2
 
 rllib = 
@@ -66,9 +66,9 @@ all =
     numpy==1.23.5
     gym==0.26.2
     stable-baselines3==1.2.0
-    huggingface_sb3
-    sample-factory
-
+    sample-factory==2.0.3
     ray==2.2.0
     ray[rllib]
+
+    huggingface_sb3
     tensorflow_probability