From 69ce980e802a7782ccede0a34b703079fbd38516 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Sat, 15 Jul 2023 12:04:18 +0200
Subject: [PATCH 1/3] feat(experiment_name): set experiment_name also in sb3
 and sf + added reward tracking in sb3 wrapper

---
 docs/ADV_STABLE_BASELINES_3.md                |  2 +-
 examples/stable_baselines3_example.py         | 16 ++++++++++++++--
 godot_rl/main.py                              |  6 +++---
 godot_rl/wrappers/sample_factory_wrapper.py   |  1 +
 godot_rl/wrappers/stable_baselines_wrapper.py |  4 +++-
 5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md
index e2db8d38..8ce36410 100644
--- a/docs/ADV_STABLE_BASELINES_3.md
+++ b/docs/ADV_STABLE_BASELINES_3.md
@@ -46,7 +46,7 @@ While the default options for sb3 work reasonably well. You may be interested in
 
 We recommend taking the [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py) and modifying to match your needs. 
 
-This example exposes more parameter for the user to configure, such as `--speedup` to run the environment faster than realtime and the `n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training).
+This example exposes more parameter for the user to configure, such as `--speedup` to run the environment faster than realtime and the `--n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training).
 
 
 ```python
diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
index 29e92e1a..9e2921b0 100644
--- a/examples/stable_baselines3_example.py
+++ b/examples/stable_baselines3_example.py
@@ -17,6 +17,18 @@
     type=str,
     help="The Godot binary to use, do not include for in editor training",
 )
+parser.add_argument(
+    "--experiment_dir",
+    default="logs/log",
+    type=str,
+    help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
+)
+parser.add_argument(
+    "--experiment_name",
+    default="Experiment",
+    type=str,
+    help="The name of the the experiment, which will be displayed in tensborboard",
+)
 parser.add_argument(
     "--onnx_export_path",
     default=None,
@@ -32,8 +44,8 @@
 env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup)
 env = VecMonitor(env)
 
-model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log="logs/log")
-model.learn(1000000)
+model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log=args.experiment_dir)
+model.learn(1000000, tb_log_name=args.experiment_name)
 
 print("closing env")
 env.close()
diff --git a/godot_rl/main.py b/godot_rl/main.py
index 6fe6e6fc..90a76355 100644
--- a/godot_rl/main.py
+++ b/godot_rl/main.py
@@ -59,16 +59,16 @@ def get_args():
         default="sb3",
         choices=["sb3", "sf", "rllib"],
         type=str,
-        help="framework to use (rllib or stable-baselines)",
+        help="framework to use (rllib, sf, sb3)",
     )
     parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use")
-    parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file (used by rllib)")
+    parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file [only for rllib]")
     parser.add_argument("--restore", default=None, type=str, help="the location of a checkpoint to restore from")
     parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model")
     parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env")
     parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model")
     parser.add_argument("--num_gpus", default=None, type=int, help="Number of GPUs to use [only for rllib]")
-    parser.add_argument("--experiment_name", default=None, type=str, help="The name of the experiment [only for rllib]")
+    parser.add_argument("--experiment_name", default=None, type=str, help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
 
     return parser.parse_known_args()
diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index d7bd928b..5674e85d 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -171,6 +171,7 @@ def parse_gdrl_args(argv=None, evaluation=False):
     add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation)
     gdrl_override_defaults(partial_cfg.env, parser)
     final_cfg = parse_full_cfg(parser, argv)
+    final_cfg.experiment = final_cfg.experiment_name if final_cfg.experiment_name != None else final_cfg.experiment
     return final_cfg
 
 
diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py
index b6348575..9165cd6a 100644
--- a/godot_rl/wrappers/stable_baselines_wrapper.py
+++ b/godot_rl/wrappers/stable_baselines_wrapper.py
@@ -2,6 +2,7 @@
 import numpy as np
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env.base_vec_env import VecEnv
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from godot_rl.core.godot_env import GodotEnv
@@ -128,6 +129,7 @@ def step_wait(self) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List
 def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> None:
     # Initialize the custom environment
     env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs)
+    env = VecMonitor(env)
 
     # Initialize the PPO model
     model = PPO(
@@ -140,7 +142,7 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) ->
     )
 
     # Train the model
-    model.learn(n_steps)
+    model.learn(n_steps, tb_log_name=args.experiment_name)
 
     print("closing env")
     env.close()

From b8625a3b2d2fb29b47b83ae855b27b45e40c05cd Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Sat, 15 Jul 2023 13:11:07 +0200
Subject: [PATCH 2/3] feat(experiment dir): added option to set experiment dir
 and unify names and locations

---
 examples/clean_rl_example.py                  | 10 ++++++----
 examples/stable_baselines3_example.py         |  2 +-
 godot_rl/main.py                              |  9 ++-------
 godot_rl/wrappers/ray_wrapper.py              |  1 +
 godot_rl/wrappers/sample_factory_wrapper.py   | 15 ++++++++++++++-
 godot_rl/wrappers/stable_baselines_wrapper.py |  2 +-
 6 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py
index 6420e368..b2dbc530 100644
--- a/examples/clean_rl_example.py
+++ b/examples/clean_rl_example.py
@@ -17,8 +17,10 @@
 def parse_args():
     # fmt: off
     parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
+    parser.add_argument("--experiment_dir", default="logs_cleanrl", type=str,
+        help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
+    parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str,
+        help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--seed", type=int, default=1,
         help="seed of the experiment")
     parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
@@ -124,7 +126,7 @@ def get_action_and_value(self, x, action=None):
 
 if __name__ == "__main__":
     args = parse_args()
-    run_name = f"{args.env_path}__{args.exp_name}__{args.seed}__{int(time.time())}"
+    run_name = f"{args.env_path}__{args.experiment_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
 
@@ -137,7 +139,7 @@ def get_action_and_value(self, x, action=None):
             # monitor_gym=True, no longer works for gymnasium
             save_code=True,
         )
-    writer = SummaryWriter(f"runs/{run_name}")
+    writer = SummaryWriter(f"{args.experiment_dir}/{run_name}")
     writer.add_text(
         "hyperparameters",
         "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
index 9e2921b0..342f4d83 100644
--- a/examples/stable_baselines3_example.py
+++ b/examples/stable_baselines3_example.py
@@ -19,7 +19,7 @@
 )
 parser.add_argument(
     "--experiment_dir",
-    default="logs/log",
+    default="logs_sb3",
     type=str,
     help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
 )
diff --git a/godot_rl/main.py b/godot_rl/main.py
index 90a76355..312c29b6 100644
--- a/godot_rl/main.py
+++ b/godot_rl/main.py
@@ -54,13 +54,7 @@ def sample_factory_training(args, extras):
 
 def get_args():
     parser = argparse.ArgumentParser(allow_abbrev=False)
-    parser.add_argument(
-        "--trainer",
-        default="sb3",
-        choices=["sb3", "sf", "rllib"],
-        type=str,
-        help="framework to use (rllib, sf, sb3)",
-    )
+    parser.add_argument("--trainer", default="sb3", choices=["sb3", "sf", "rllib"], type=str, help="framework to use (rllib, sf, sb3)")
     parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use")
     parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file [only for rllib]")
     parser.add_argument("--restore", default=None, type=str, help="the location of a checkpoint to restore from")
@@ -68,6 +62,7 @@ def get_args():
     parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env")
     parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model")
     parser.add_argument("--num_gpus", default=None, type=int, help="Number of GPUs to use [only for rllib]")
+    parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
     parser.add_argument("--experiment_name", default=None, type=str, help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
 
diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
index e8605f03..6d9fbf50 100644
--- a/godot_rl/wrappers/ray_wrapper.py
+++ b/godot_rl/wrappers/ray_wrapper.py
@@ -160,6 +160,7 @@ def rllib_training(args, extras):
         checkpoint_freq=checkpoint_freq,
         checkpoint_at_end=not args.eval,
         restore=args.restore,
+        local_dir=args.experiment_dir or "logs_rllib",
         trial_name_creator=lambda trial: f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}"
     )
     if args.export:
diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index 5674e85d..9097c405 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -164,6 +164,18 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
         type=int,
         help="Num agents in each envpool (if used)",
     )
+    p.add_argument(
+        "--experiment_dir",
+        default="logs_sf",
+        type=str,
+        help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
+    )
+    p.add_argument(
+        "--experiment_name",
+        default=None,
+        type=str,
+        help="The name of the the experiment, which will be displayed in tensborboard",
+    )
 
 
 def parse_gdrl_args(argv=None, evaluation=False):
@@ -171,7 +183,8 @@ def parse_gdrl_args(argv=None, evaluation=False):
     add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation)
     gdrl_override_defaults(partial_cfg.env, parser)
     final_cfg = parse_full_cfg(parser, argv)
-    final_cfg.experiment = final_cfg.experiment_name if final_cfg.experiment_name != None else final_cfg.experiment
+    final_cfg.train_dir = final_cfg.experiment_dir 
+    final_cfg.experiment = final_cfg.experiment_name or final_cfg.experiment
     return final_cfg
 
 
diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py
index 9165cd6a..07f1dbd2 100644
--- a/godot_rl/wrappers/stable_baselines_wrapper.py
+++ b/godot_rl/wrappers/stable_baselines_wrapper.py
@@ -138,7 +138,7 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) ->
         ent_coef=0.0001,
         verbose=2,
         n_steps=32,
-        tensorboard_log="logs/log",
+        tensorboard_log=args.experiment_dir or "logs_sb3",
     )
 
     # Train the model

From e377033abdf9fee00fc515ebdd8e0e40c54ebe0f Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Sat, 15 Jul 2023 15:15:21 +0200
Subject: [PATCH 3/3] feat(logging): change default log dir + fix
 sample-factory version to make it gdrl work out of the box

---
 README.md                                     |  2 +-
 docs/ADV_RLLIB.md                             |  2 +-
 docs/ADV_SAMPLE_FACTORY.md                    | 12 ++++++------
 docs/ADV_STABLE_BASELINES_3.md                |  2 +-
 docs/EXAMPLE_ENVIRONMENTS.md                  |  8 ++++----
 examples/clean_rl_example.py                  |  4 ++--
 examples/stable_baselines3_example.py         |  2 +-
 godot_rl/main.py                              |  3 ---
 godot_rl/wrappers/ray_wrapper.py              |  2 +-
 godot_rl/wrappers/sample_factory_wrapper.py   |  7 ++++---
 godot_rl/wrappers/stable_baselines_wrapper.py |  2 +-
 setup.cfg                                     |  8 ++++----
 12 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 4547386b..56e88ef6 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ You may need to example run permissions on the game executable. `chmod +x exampl
 3. Train and visualize 
 
 ```bash
-gdrl --env=gdrl --env_path=examples/godot_rl_JumperHard/bin/JumperHard.x86_64 --viz
+gdrl --env=gdrl --env_path=examples/godot_rl_JumperHard/bin/JumperHard.x86_64 --experiment_name=Experiment_01 --viz
 ```
 
 ### In editor interactive training
diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md
index 105a907d..9f3eba77 100644
--- a/docs/ADV_RLLIB.md
+++ b/docs/ADV_RLLIB.md
@@ -22,7 +22,7 @@ chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 • Train a model from scratch:
 
 ```
-gdrl --trainer=rllib --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --speedup=8 --viz
+gdrl --trainer=rllib --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --speedup=8 --experiment_name=Experiment_01 --viz
 ```
 
 By default rllib will use the hyperparameters in the **ppo_test.yaml** file on the github repo. You can either modify this file, or create your own one.
diff --git a/docs/ADV_SAMPLE_FACTORY.md b/docs/ADV_SAMPLE_FACTORY.md
index eae23864..84347b70 100644
--- a/docs/ADV_SAMPLE_FACTORY.md
+++ b/docs/ADV_SAMPLE_FACTORY.md
@@ -45,7 +45,7 @@ chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 • Train a model from scratch:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=10 --experiment=BallChase --viz  --speedup=8 --batched_sampling=True
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=10 --experiment_name=BallChase --viz  --speedup=8 --batched_sampling=True
 ```
 
 • Download a pretrained checkpoint from the HF hub:
@@ -57,7 +57,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_
 • Visualize a trained model:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=1 --experiment=<ENV_NAME> --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=<HF_USERNAME>/sample_factory_<ENV_NAME>
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --num_workers=1 --experiment_name=<ENV_NAME> --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=<HF_USERNAME>/sample_factory_<ENV_NAME>
 ```
 
 ## Advanced Environment Usage
@@ -74,7 +74,7 @@ chmod +x examples/godot_rl_Racer/bin/Racer.x86_64 # linux example
 • Train a model from scratch:
 
 ```bash
-gdrl--trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --train_for_env_steps=10000000 --experiment=Racer --reward_scale=0.01 --worker_num_splits=2 --num_envs_per_worker=2 --num_workers=40 --speedup=8 --batched_sampling=True --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2  --learning_rate=0.0001 --exploration_loss_coef=0.0001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.04 --use_rnn=True --recurrence=32
+gdrl--trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --train_for_env_steps=10000000 --experiment_name=Racer --reward_scale=0.01 --worker_num_splits=2 --num_envs_per_worker=2 --num_workers=40 --speedup=8 --batched_sampling=True --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2  --learning_rate=0.0001 --exploration_loss_coef=0.0001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.04 --use_rnn=True --recurrence=32
 ```
 
 • Download a pretrained checkpoint from the HF hub:
@@ -86,7 +86,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_
 • Visualize a trained model:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --num_workers=1 --experiment=Racer --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_Racer
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_Racer/bin/Racer.x86_64 --num_workers=1 --experiment_name=Racer --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_Racer
 ```
 
 ### Usage instructions for env **MultiAgent FPS**
@@ -101,7 +101,7 @@ chmod +x examples/godot_rl_FPS/bin/FPS.x86_64 # linux example
 • Train a model from scratch:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=10 --experiment=FPS --viz --batched_sampling=True --speedup=8 --num_workers=80 --batched_sampling=False --num_policies=4 --with_pbt=True --pbt_period_env_steps=1000000 --pbt_start_mutation=1000000 --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.00005 --exploration_loss_coef=0.001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.08 --use_rnn=True --recurrence=32
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=10 --experiment_name=FPS --viz --batched_sampling=True --speedup=8 --num_workers=80 --batched_sampling=False --num_policies=4 --with_pbt=True --pbt_period_env_steps=1000000 --pbt_start_mutation=1000000 --batch_size=2048 --num_batches_per_epoch=2 --num_epochs=2 --learning_rate=0.00005 --exploration_loss_coef=0.001 --lr_schedule=kl_adaptive_epoch --lr_schedule_kl_threshold=0.08 --use_rnn=True --recurrence=32
 ```
 
 • Download a pretrained checkpoint from the HF hub:
@@ -113,7 +113,7 @@ python -m sample_factory.huggingface.load_from_hub -r edbeeching/sample_factory_
 • Visualize a trained model:
 
 ```bash
-gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=1 --experiment=FPS --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_FPS
+gdrl --trainer=sf --env=gdrl --env_path=examples/godot_rl_FPS/bin/FPS.x86_64 --num_workers=1 --experiment_name=FPS --viz --eval --batched_sampling=True --speedup=8 --push_to_hub --hf_repository=edbeeching/sample_factory_FPS
 ```
 
 ## Training on a cluster
diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md
index 8ce36410..c3d224bb 100644
--- a/docs/ADV_STABLE_BASELINES_3.md
+++ b/docs/ADV_STABLE_BASELINES_3.md
@@ -39,7 +39,7 @@ chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 ### Train a model from scratch:
 
 ```bash
-gdrl --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --viz
+gdrl --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --experiment_name=Experiment_01 --viz
 ```
 
 While the default options for sb3 work reasonably well. You may be interested in changing the hyperparameters.
diff --git a/docs/EXAMPLE_ENVIRONMENTS.md b/docs/EXAMPLE_ENVIRONMENTS.md
index 4dfb3255..0ca6551a 100644
--- a/docs/EXAMPLE_ENVIRONMENTS.md
+++ b/docs/EXAMPLE_ENVIRONMENTS.md
@@ -36,7 +36,7 @@ For the current version, we provide 4 example environments, located in **envs/ex
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --config_file envs/configs/ppo_config_jumper_hard.yaml
+gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --config_file envs/configs/ppo_config_jumper_hard.yaml --experiment_name=Experiment_01
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/jumper_hard/**
 You can monitor training curves etc with tensorboard
@@ -75,7 +75,7 @@ gdrl --env_path envs/builds/JumperHard/jumper_hard.x86_64 --eval --restore envs/
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --config_file envs/configs/ppo_config_ball_chase.yaml
+gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --config_file envs/configs/ppo_config_ball_chase.yaml --experiment_name=BallChase_01 
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/ball_chase/**
 You can monitor training curves etc with tensorboard
@@ -112,7 +112,7 @@ gdrl --env_path envs/builds/BallChase/ball_chase.x86_64 --eval --restore envs/ch
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --config_file envs/configs/ppo_config_fly_by.yaml
+gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --config_file envs/configs/ppo_config_fly_by.yaml --experiment_name=FlyBy_01 
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/fly_by/**
 You can monitor training curves etc with tensorboard
@@ -153,7 +153,7 @@ gdrl --env_path envs/builds/FlyBy/fly_by.x86_64 --eval --restore envs/checkpoint
 ### Example training:
 The agent can be trained with the following command:
 ```
-gdrl --env_path envs/builds/SpaceShooter/space_shooter.x86_64 --config_file envs/configs/ppo_config_space_shooter.yaml
+gdrl --env_path envs/builds/SpaceShooter/space_shooter.x86_64 --config_file envs/configs/ppo_config_space_shooter.yaml --experiment_name=Shooter_01 
 ```
 Training logs will be output by default to **/home/USER/ray_results/PPO/space_shooter/**
 You can monitor training curves etc with tensorboard
diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py
index b2dbc530..0136c91d 100644
--- a/examples/clean_rl_example.py
+++ b/examples/clean_rl_example.py
@@ -17,7 +17,7 @@
 def parse_args():
     # fmt: off
     parser = argparse.ArgumentParser()
-    parser.add_argument("--experiment_dir", default="logs_cleanrl", type=str,
+    parser.add_argument("--experiment_dir", default="logs/cleanrl", type=str,
         help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
     parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str,
         help="The name of the the experiment, which will be displayed in tensborboard")
@@ -126,7 +126,7 @@ def get_action_and_value(self, x, action=None):
 
 if __name__ == "__main__":
     args = parse_args()
-    run_name = f"{args.env_path}__{args.experiment_name}__{args.seed}__{int(time.time())}"
+    run_name = f"{args.experiment_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
 
diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
index 342f4d83..92b043e1 100644
--- a/examples/stable_baselines3_example.py
+++ b/examples/stable_baselines3_example.py
@@ -19,7 +19,7 @@
 )
 parser.add_argument(
     "--experiment_dir",
-    default="logs_sb3",
+    default="logs/sb3",
     type=str,
     help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
 )
diff --git a/godot_rl/main.py b/godot_rl/main.py
index 312c29b6..329049a7 100644
--- a/godot_rl/main.py
+++ b/godot_rl/main.py
@@ -25,7 +25,6 @@
 try:
     from godot_rl.wrappers.ray_wrapper import rllib_training
 except ImportError as e:
-    print("Warning: ", e)
     def rllib_training(args, extras):
         print("Import error when trying to use rllib. If you have not installed the package, try: pip install godot-rl[rllib]")
         print("Otherwise try fixing the error above.")
@@ -34,7 +33,6 @@ def rllib_training(args, extras):
 try:
     from godot_rl.wrappers.stable_baselines_wrapper import stable_baselines_training
 except ImportError as e:
-    print("Warning: ", e)
     def stable_baselines_training(args, extras):
         print(
             "Import error when trying to use sb3. If you have not installed the package, try: pip install godot-rl[sb3]"
@@ -44,7 +42,6 @@ def stable_baselines_training(args, extras):
 try:
     from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy
 except ImportError as e:
-    print("Warning: ", e)
     def sample_factory_training(args, extras):
         print(
             "Import error when trying to use sample-factory If you have not installed the package, try: pip install godot-rl[sf]"
diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
index 6d9fbf50..1eb5b135 100644
--- a/godot_rl/wrappers/ray_wrapper.py
+++ b/godot_rl/wrappers/ray_wrapper.py
@@ -160,7 +160,7 @@ def rllib_training(args, extras):
         checkpoint_freq=checkpoint_freq,
         checkpoint_at_end=not args.eval,
         restore=args.restore,
-        local_dir=args.experiment_dir or "logs_rllib",
+        local_dir=args.experiment_dir or "logs/rllib",
         trial_name_creator=lambda trial: f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}"
     )
     if args.export:
diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index 9097c405..e81cccaf 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -166,7 +166,7 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
     )
     p.add_argument(
         "--experiment_dir",
-        default="logs_sf",
+        default="logs/sf",
         type=str,
         help="The name of the the experiment directory, in which the tensorboard logs are getting stored",
     )
@@ -183,8 +183,9 @@ def parse_gdrl_args(argv=None, evaluation=False):
     add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation)
     gdrl_override_defaults(partial_cfg.env, parser)
     final_cfg = parse_full_cfg(parser, argv)
-    final_cfg.train_dir = final_cfg.experiment_dir 
-    final_cfg.experiment = final_cfg.experiment_name or final_cfg.experiment
+    args, _ = parser.parse_known_args()
+    final_cfg.train_dir = args.experiment_dir or "logs/sf"
+    final_cfg.experiment = args.experiment_name or final_cfg.experiment
     return final_cfg
 
 
diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py
index 07f1dbd2..2134c6bc 100644
--- a/godot_rl/wrappers/stable_baselines_wrapper.py
+++ b/godot_rl/wrappers/stable_baselines_wrapper.py
@@ -138,7 +138,7 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) ->
         ent_coef=0.0001,
         verbose=2,
         n_steps=32,
-        tensorboard_log=args.experiment_dir or "logs_sb3",
+        tensorboard_log=args.experiment_dir or "logs/sb3",
     )
 
     # Train the model
diff --git a/setup.cfg b/setup.cfg
index 482f08be..e3c54c2b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -50,7 +50,7 @@ sb3 =
     huggingface_sb3
 
 sf =
-    sample-factory
+    sample-factory==2.0.3
     gym==0.26.2
 
 rllib = 
@@ -66,9 +66,9 @@ all =
     numpy==1.23.5
     gym==0.26.2
     stable-baselines3==1.2.0
-    huggingface_sb3
-    sample-factory
-    
+    sample-factory==2.0.3
     ray==2.2.0
     ray[rllib]
+    
+    huggingface_sb3
     tensorflow_probability