From 2a335c39a69d60ce1b675535172537e50af3b74e Mon Sep 17 00:00:00 2001
From: edbeeching <edbeeching@gmail.com>
Date: Thu, 27 Jul 2023 10:40:49 +0200
Subject: [PATCH 01/32] fixes info shape on reset

---
 godot_rl/core/godot_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py
index 6fed3637..1c888215 100644
--- a/godot_rl/core/godot_env.py
+++ b/godot_rl/core/godot_env.py
@@ -225,7 +225,7 @@ def reset(self, seed=None):
         response["obs"] = self._process_obs(response["obs"])
         assert response["type"] == "reset"
         obs = response["obs"]
-        return obs, {}
+        return obs, [{}] * self.num_envs
 
     def call(self, method):
         message = {

From b78b1bed1fcca64567ad4b1864e3777735a779f9 Mon Sep 17 00:00:00 2001
From: edbeeching <edbeeching@gmail.com>
Date: Thu, 27 Jul 2023 10:41:15 +0200
Subject: [PATCH 02/32] updates RLLIB wrapper to support latest version

---
 godot_rl/wrappers/ray_wrapper.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
index 1eb5b135..9357214c 100644
--- a/godot_rl/wrappers/ray_wrapper.py
+++ b/godot_rl/wrappers/ray_wrapper.py
@@ -20,6 +20,7 @@ def __init__(
         show_window=False,
         framerate=None,
         action_repeat=None,
+        speedup=None,
         timeout_wait=60,
         config=None,
     ) -> None:
@@ -31,6 +32,7 @@ def __init__(
             show_window=show_window,
             framerate=framerate,
             action_repeat=action_repeat,
+            speedup=speedup
         )
         super().__init__(
             observation_space=self._env.observation_space,
@@ -38,23 +40,28 @@ def __init__(
             num_envs=self._env.num_envs,
         )
 
-    def vector_reset(self) -> List[EnvObsType]:
-        obs, info = self._env.reset()
-        return obs
+    def vector_reset(self, *, seeds: Optional[List[int]] = None, options: Optional[List[dict]] = None) -> List[EnvObsType]:
+        self.obs, info = self._env.reset()
+        return self.obs, info 
 
     def vector_step(
         self, actions: List[EnvActionType]
     ) -> Tuple[List[EnvObsType], List[float], List[bool], List[EnvInfoDict]]:
         actions = np.array(actions)
         self.obs, reward, term, trunc, info = self._env.step(actions, order_ij=True)
-        return self.obs, reward, term, info
+        return self.obs, reward, term, trunc, info
 
     def get_unwrapped(self):
         return [self._env]
 
-    def reset_at(self, index: Optional[int]) -> EnvObsType:
+    def reset_at(self,     
+            index: Optional[int] = None,
+            *,
+            seed: Optional[int] = None,
+            options: Optional[dict] = None,
+        ) -> EnvObsType:
         # the env is reset automatically, no need to reset it
-        return self.obs[index]
+        return self.obs[index], {}
 
 
 def register_env():
@@ -68,6 +75,7 @@ def register_env():
             framerate=c["framerate"],
             seed=c.worker_index + c["seed"],
             action_repeat=c["framerate"],
+            speedup=c["speedup"],
         ),
     )
 
@@ -133,6 +141,10 @@ def rllib_training(args, extras):
 
     checkpoint_freq = 10
     checkpoint_at_end = True
+    
+    exp["config"]["env_config"]["show_window"] = args.viz
+    exp["config"]["env_config"]["speedup"] = args.speedup
+    
     if args.eval or args.export:
         checkpoint_freq = 0
         exp["config"]["env_config"]["show_window"] = True

From cc723760ce5457ddddd5baed90b1eca55af76c5e Mon Sep 17 00:00:00 2001
From: edbeeching <edbeeching@gmail.com>
Date: Thu, 27 Jul 2023 10:41:52 +0200
Subject: [PATCH 03/32] update setup and toml file for a minor release

---
 pyproject.toml |  2 +-
 setup.cfg      | 10 +---------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f2fe56de..f2ce4faf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "godot_rl"
-version = "0.6.0"
+version = "0.6.1"
 authors = [
   { name="Edward Beeching", email="edbeeching@gmail.com" },
 ]
diff --git a/setup.cfg b/setup.cfg
index ddba42b9..66549f6b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -48,17 +48,9 @@ sf =
     sample-factory
 
 rllib = 
-    numpy==1.23.5
-    ray==2.2.0
+    gymnasium==0.26.3
     ray[rllib]
-    tensorflow_probability
 
 clean-rl = 
     wandb
 
-all =     
-    numpy==1.23.5
-    sample-factory
-    ray==2.2.0
-    ray[rllib]
-    tensorflow_probability

From 6a735d11429810c4f23eb56e13f3684dbb81200f Mon Sep 17 00:00:00 2001
From: edbeeching <edbeeching@gmail.com>
Date: Thu, 27 Jul 2023 12:59:03 +0200
Subject: [PATCH 04/32] updates docs

---
 docs/ADV_CLEAN_RL.md | 6 +++---
 docs/ADV_RLLIB.md    | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/ADV_CLEAN_RL.md b/docs/ADV_CLEAN_RL.md
index 04861b13..a622790f 100644
--- a/docs/ADV_CLEAN_RL.md
+++ b/docs/ADV_CLEAN_RL.md
@@ -17,11 +17,11 @@ You can read more about CleanRL in their [technical paper](https://arxiv.org/abs
 
 # Installation
 ```bash
-pip install godot-rl[clean-rl]
+pip install godot-rl[cleanrl]
 ```
 
-While the default options for clean-rl work reasonably well. You may be interested in changing the hyperparameters.
-We recommend taking the [clean-rl example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/clean_rl_example.py) and modifying to match your needs.
+While the default options for cleanrl work reasonably well. You may be interested in changing the hyperparameters.
+We recommend taking the [cleanrl example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/clean_rl_example.py) and modifying to match your needs.
 
 ```python
     parser.add_argument("--gae-lambda", type=float, default=0.95,
diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md
index 9f3eba77..9a8cd388 100644
--- a/docs/ADV_RLLIB.md
+++ b/docs/ADV_RLLIB.md
@@ -4,6 +4,8 @@
 
 ## Installation
 
+If you want to train with rllib, create a new environment e.g.: `python -m venv venv.rllib` as rllib's dependencies can conflict with those of sb3 and other libraries.
+
 ```bash
 # remove sb3 installation with pip uninstall godot-rl[sb3]
 pip install godot-rl[rllib]

From 326b188dcae3e74625592141be30c7a61d805d61 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Thu, 27 Jul 2023 13:35:19 +0200
Subject: [PATCH 05/32] chore(addon): use addon submodule from main branch

---
 .gitignore             | 1 +
 godot_rl_agents_plugin | 2 +-
 setup.cfg              | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index d22416ac..f5f13110 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,6 +136,7 @@ dmypy.json
 
 envs/unity/
 logs/
+logs.*/
 dump/
 tmp/
 Packaging Python Projects — Python Packaging User Guide_files/
diff --git a/godot_rl_agents_plugin b/godot_rl_agents_plugin
index 3984fd12..5b09dc90 160000
--- a/godot_rl_agents_plugin
+++ b/godot_rl_agents_plugin
@@ -1 +1 @@
-Subproject commit 3984fd124a2b941a446f4614bb0eacd09a2468f5
+Subproject commit 5b09dc906eae1e037c4f8b0b09a1ffe11340802f
diff --git a/setup.cfg b/setup.cfg
index 66549f6b..a6dc219f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -51,6 +51,6 @@ rllib =
     gymnasium==0.26.3
     ray[rllib]
 
-clean-rl = 
+cleanrl = 
     wandb
 

From 20c4df7ed73e04eeb04c0e4d16d00091dad7818d Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Thu, 27 Jul 2023 14:14:27 +0200
Subject: [PATCH 06/32] fix(ray): make mixed action types work with latest
 numpy version

---
 godot_rl/wrappers/ray_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
index 9357214c..9c4caffe 100644
--- a/godot_rl/wrappers/ray_wrapper.py
+++ b/godot_rl/wrappers/ray_wrapper.py
@@ -47,7 +47,7 @@ def vector_reset(self, *, seeds: Optional[List[int]] = None, options: Optional[L
     def vector_step(
         self, actions: List[EnvActionType]
     ) -> Tuple[List[EnvObsType], List[float], List[bool], List[EnvInfoDict]]:
-        actions = np.array(actions)
+        actions = np.array(actions, dtype=np.dtype(object))
         self.obs, reward, term, trunc, info = self._env.step(actions, order_ij=True)
         return self.obs, reward, term, trunc, info
 

From 1d4baa54e5e5435fd8a97167c12c48dcbad05ea9 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Thu, 27 Jul 2023 16:27:30 +0200
Subject: [PATCH 07/32] fix(cleanrl): make cleanrl work again

---
 examples/clean_rl_example.py          | 2 --
 godot_rl/wrappers/clean_rl_wrapper.py | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py
index 09b6f572..82595538 100644
--- a/examples/clean_rl_example.py
+++ b/examples/clean_rl_example.py
@@ -5,7 +5,6 @@
 import time
 from distutils.util import strtobool
 from collections import deque
-import gym
 import numpy as np
 import torch
 import torch.nn as nn
@@ -156,7 +155,6 @@ def get_action_and_value(self, x, action=None):
     # env setup
     
     envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=True, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
-    #assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
     args.num_envs = envs.num_envs
     args.batch_size = int(args.num_envs * args.num_steps)
     args.minibatch_size = int(args.batch_size // args.num_minibatches)
diff --git a/godot_rl/wrappers/clean_rl_wrapper.py b/godot_rl/wrappers/clean_rl_wrapper.py
index 9f42f874..bd73d4e2 100644
--- a/godot_rl/wrappers/clean_rl_wrapper.py
+++ b/godot_rl/wrappers/clean_rl_wrapper.py
@@ -1,6 +1,6 @@
 
 import numpy as np
-import gym
+import gymnasium as gym
 from godot_rl.core.utils import lod_to_dol
 from godot_rl.core.godot_env import GodotEnv
 

From 1a1b18f069e39c364089f8e00b4efe97c9524814 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Thu, 27 Jul 2023 16:30:29 +0200
Subject: [PATCH 08/32] fix(hp tuning): use gymnasium instead of gym

---
 examples/stable_baselines3_hp_tuning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/stable_baselines3_hp_tuning.py b/examples/stable_baselines3_hp_tuning.py
index 618f5c88..7e280f8c 100644
--- a/examples/stable_baselines3_hp_tuning.py
+++ b/examples/stable_baselines3_hp_tuning.py
@@ -23,7 +23,7 @@
 from typing import Any
 from typing import Dict
 
-import gym
+import gymnasium as gym
 
 from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 from godot_rl.core.godot_env import GodotEnv

From ac1c4bbdaf551fb950a9ef20a71e74a8a527039d Mon Sep 17 00:00:00 2001
From: edbeeching <edbeeching@gmail.com>
Date: Thu, 27 Jul 2023 17:18:12 +0200
Subject: [PATCH 09/32] adds tests for rllib

---
 .github/workflows/test-ci.yml                 | 50 +++++++++++++++++++
 examples/stable_baselines3_example.py         |  4 +-
 godot_rl/wrappers/stable_baselines_wrapper.py |  4 +-
 tests/fixtures/test_rllib.yaml                | 39 +++++++++++++++
 tests/test_rllib.py                           | 14 ++++++
 tests/test_sample_factory.py                  |  3 +-
 tests/test_sb3_onnx_export.py                 | 11 ++--
 tests/test_sb3_training.py                    | 18 ++-----
 8 files changed, 121 insertions(+), 22 deletions(-)
 create mode 100644 tests/fixtures/test_rllib.yaml
 create mode 100644 tests/test_rllib.py

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 5bc6933b..5c9c59ef 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -55,3 +55,53 @@ jobs:
     - name: Test with pytest
       run: |
         make test
+
+
+  tests_ubuntu_rllib:
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9, 3.10.10]
+        os: ['ubuntu-latest']
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        # cpu version of pytorch
+        pip install .[test,rllib]
+    - name: Download examples
+      run: |
+        make download_examples
+
+    - name: Test with pytest
+      run: |
+        make test
+  tests_windows_rllib:
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9, 3.10.10]
+        os: ['windows-latest']
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        # cpu version of pytorch
+        pip install .[test,rllib]
+    - name: Download examples
+      run: |
+        make download_examples
+
+    - name: Test with pytest
+      run: |
+        make test
diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
index fd9c22fc..9ea90c66 100644
--- a/examples/stable_baselines3_example.py
+++ b/examples/stable_baselines3_example.py
@@ -3,6 +3,7 @@
 import pathlib
 
 from stable_baselines3.common.callbacks import CheckpointCallback
+from godot_rl.core.utils import can_import
 from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx
 from stable_baselines3 import PPO
@@ -11,7 +12,8 @@
 # To download the env source and binary:
 # 1.  gdrl.env_from_hub -r edbeeching/godot_rl_BallChase
 # 2.  chmod +x examples/godot_rl_BallChase/bin/BallChase.x86_64
-
+if can_import("ray"):
+    print("WARNING, stable baselines and ray[rllib] are not compatable")
 
 parser = argparse.ArgumentParser(allow_abbrev=False)
 parser.add_argument(
diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py
index 03dad780..2462b78e 100644
--- a/godot_rl/wrappers/stable_baselines_wrapper.py
+++ b/godot_rl/wrappers/stable_baselines_wrapper.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from godot_rl.core.godot_env import GodotEnv
-from godot_rl.core.utils import lod_to_dol
+from godot_rl.core.utils import can_import, lod_to_dol
 
 
 class StableBaselinesGodotEnv(VecEnv):
@@ -129,6 +129,8 @@ def step_wait(self) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List
         return self.results
 
 def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> None:
+    if can_import("ray"):
+        print("WARNING, stable baselines and ray[rllib] are not compatable")
     # Initialize the custom environment
     env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs)
     env = VecMonitor(env)
diff --git a/tests/fixtures/test_rllib.yaml b/tests/fixtures/test_rllib.yaml
new file mode 100644
index 00000000..955c6449
--- /dev/null
+++ b/tests/fixtures/test_rllib.yaml
@@ -0,0 +1,39 @@
+
+algorithm: PPO
+
+stop:
+    episode_reward_mean: 5000
+    training_iteration: 1000
+    timesteps_total: 200
+
+config:
+    env: godot
+    env_config:
+        framerate: null
+        action_repeat: null
+        show_window: false
+        seed: 0
+    framework: torch  
+    lambda: 0.95
+    gamma: 0.95
+
+    vf_clip_param: 1.0
+    clip_param: 0.2
+    entropy_coeff: 0.001
+    entropy_coeff_schedule: null
+    train_batch_size: 1024
+    sgd_minibatch_size: 128
+    num_sgd_iter: 16
+    num_workers: 4
+    lr: 0.0003
+    num_envs_per_worker: 16
+    batch_mode: truncate_episodes
+    rollout_fragment_length: 16
+    num_gpus: 1
+    model:
+        fcnet_hiddens: [256, 256] 
+        use_lstm: false
+        lstm_cell_size : 32
+        framestack: 4
+    no_done_at_end: false
+    soft_horizon: false
diff --git a/tests/test_rllib.py b/tests/test_rllib.py
new file mode 100644
index 00000000..d6eb62f5
--- /dev/null
+++ b/tests/test_rllib.py
@@ -0,0 +1,14 @@
+import pytest
+
+from godot_rl.core.utils import cant_import
+
+@pytest.mark.skipif(cant_import("ray"), reason="ray[rllib] is not available")
+def test_rllib_training():
+    from godot_rl.wrappers.ray_wrapper import rllib_training
+    from godot_rl.main import get_args
+    args, extras = get_args()
+    args.config_file = "tests/fixtures/test_rllib.yaml"
+    args.env_path = "examples/godot_rl_JumperHard/bin/JumperHard.x86_64"
+
+    
+    rllib_training(args, extras)
\ No newline at end of file
diff --git a/tests/test_sample_factory.py b/tests/test_sample_factory.py
index 7f6b686d..eaa9c826 100644
--- a/tests/test_sample_factory.py
+++ b/tests/test_sample_factory.py
@@ -13,4 +13,5 @@ def test_sample_factory_training():
     extras.append('--train_for_env_steps=1000')
     extras.append('--device=cpu')
     
-    sample_factory_training(args, extras)
\ No newline at end of file
+    sample_factory_training(args, extras)
+    
diff --git a/tests/test_sb3_onnx_export.py b/tests/test_sb3_onnx_export.py
index b910cfce..4dc1c4eb 100644
--- a/tests/test_sb3_onnx_export.py
+++ b/tests/test_sb3_onnx_export.py
@@ -1,13 +1,10 @@
 import os
 
 import pytest
-from stable_baselines3 import PPO
-
-from godot_rl.wrappers.onnx.stable_baselines_export import (
-    export_ppo_model_as_onnx, verify_onnx_export)
-from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 
+from godot_rl.core.utils import can_import
 
+@pytest.mark.skipif(can_import("ray"), reason="rllib and sb3 are not compatable")
 @pytest.mark.parametrize(
     "env_name,port",
     [
@@ -19,6 +16,10 @@
     ],
 )
 def test_pytorch_vs_onnx(env_name, port):
+    from stable_baselines3 import PPO
+    from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
+    from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx, verify_onnx_export
+    
     env_path = f"examples/godot_rl_{env_name}/bin/{env_name}.x86_64"
     env = StableBaselinesGodotEnv(env_path, port=port)
 
diff --git a/tests/test_sb3_training.py b/tests/test_sb3_training.py
index 864ed482..01071732 100644
--- a/tests/test_sb3_training.py
+++ b/tests/test_sb3_training.py
@@ -1,15 +1,9 @@
 import pytest
 
-from godot_rl.core.godot_env import GodotEnv
 from godot_rl.main import get_args
+from godot_rl.core.utils import can_import
 
-try:
-    from godot_rl.wrappers.stable_baselines_wrapper import stable_baselines_training
-except ImportError as e:
-
-    def stable_baselines_training(args, extras, **kwargs):
-        print("Import error when trying to use sb3, this is probably not installed try pip install godot-rl[sb3]")
-
+@pytest.mark.skipif(can_import("ray"), reason="rllib and sb3 are not compatable")
 @pytest.mark.parametrize(
     "env_name,port",
     [
@@ -20,13 +14,9 @@ def stable_baselines_training(args, extras, **kwargs):
         ("FlyBy", 12400),
     ],
 )
-@pytest.mark.parametrize(
-    "n_parallel",[
-        1,2,4
-    ]
-    
-)
+@pytest.mark.parametrize("n_parallel",[1,2,4])
 def test_sb3_training(env_name, port, n_parallel):
+    from godot_rl.wrappers.stable_baselines_wrapper import stable_baselines_training
     args, extras = get_args()
     args.env = "gdrl"
     args.env_path = f"examples/godot_rl_{env_name}/bin/{env_name}.x86_64"

From 0f1d1b1990daba91f9215758df642f5a85275b9e Mon Sep 17 00:00:00 2001
From: Edward Beeching <edbeeching@users.noreply.github.com>
Date: Thu, 27 Jul 2023 21:29:06 +0200
Subject: [PATCH 10/32] Update test-ci.yml

---
 .github/workflows/test-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 5c9c59ef..b0fe88a7 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -73,7 +73,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         # cpu version of pytorch
-        pip install .[test,rllib]
+        pip install .[rllib,test]
     - name: Download examples
       run: |
         make download_examples
@@ -97,7 +97,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         # cpu version of pytorch
-        pip install .[test,rllib]
+        pip install .[rllib,test]
     - name: Download examples
       run: |
         make download_examples

From a38e4b9be9de2e3810dfd5dca154d5577e745ff8 Mon Sep 17 00:00:00 2001
From: Edward Beeching <edbeeching@users.noreply.github.com>
Date: Thu, 27 Jul 2023 22:26:49 +0200
Subject: [PATCH 11/32] hacky fix to get rllib test to work

---
 .github/workflows/test-ci.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index b0fe88a7..c59f3a61 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -73,7 +73,9 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         # cpu version of pytorch
-        pip install .[rllib,test]
+        pip install .[test]
+        pip uninstall stable-baselines3 gymnasium
+        pip install ray[rllib]
     - name: Download examples
       run: |
         make download_examples
@@ -97,7 +99,9 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         # cpu version of pytorch
-        pip install .[rllib,test]
+        pip install .[test]
+        pip uninstall stable-baselines3 gymnasium
+        pip install ray[rllib]
     - name: Download examples
       run: |
         make download_examples

From c67b68bb177957f7225a4e58690e3d44267dc5a5 Mon Sep 17 00:00:00 2001
From: Edward Beeching <edbeeching@users.noreply.github.com>
Date: Thu, 27 Jul 2023 22:35:08 +0200
Subject: [PATCH 12/32] last try before I give up

---
 .github/workflows/test-ci.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index c59f3a61..45a0c680 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -74,8 +74,8 @@ jobs:
         python -m pip install --upgrade pip
         # cpu version of pytorch
         pip install .[test]
-        pip uninstall stable-baselines3 gymnasium
-        pip install ray[rllib]
+        pip uninstall -y stable-baselines3 gymnasium
+        pip install -y ray[rllib]
     - name: Download examples
       run: |
         make download_examples
@@ -100,8 +100,8 @@ jobs:
         python -m pip install --upgrade pip
         # cpu version of pytorch
         pip install .[test]
-        pip uninstall stable-baselines3 gymnasium
-        pip install ray[rllib]
+        pip uninstall -y stable-baselines3 gymnasium
+        pip install -y ray[rllib]
     - name: Download examples
       run: |
         make download_examples

From fd7d277f04503566061cbcd07c736d37f7899441 Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Thu, 27 Jul 2023 23:27:02 +0200
Subject: [PATCH 13/32] Update .github/workflows/test-ci.yml

---
 .github/workflows/test-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 45a0c680..b47c6db3 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -75,7 +75,7 @@ jobs:
         # cpu version of pytorch
         pip install .[test]
         pip uninstall -y stable-baselines3 gymnasium
-        pip install -y ray[rllib]
+        pip install ray[rllib]
     - name: Download examples
       run: |
         make download_examples

From d56cf4d9935a38e4d0e776c358a5ec8d47d63dbb Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Thu, 27 Jul 2023 23:27:11 +0200
Subject: [PATCH 14/32] Update .github/workflows/test-ci.yml

---
 .github/workflows/test-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index b47c6db3..99880618 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -101,7 +101,7 @@ jobs:
         # cpu version of pytorch
         pip install .[test]
         pip uninstall -y stable-baselines3 gymnasium
-        pip install -y ray[rllib]
+        pip install ray[rllib]
     - name: Download examples
       run: |
         make download_examples

From 9a5e409abe30b83d3e200bfae84233b5d6e04aad Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 10:56:25 +0200
Subject: [PATCH 15/32] fix(test): split dependency install and cleanup into
 two steps and see if it reolves the issue

---
 .github/workflows/test-ci.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 99880618..17d62741 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -74,6 +74,8 @@ jobs:
         python -m pip install --upgrade pip
         # cpu version of pytorch
         pip install .[test]
+    - name: Clean up dependencies
+      run: |
         pip uninstall -y stable-baselines3 gymnasium
         pip install ray[rllib]
     - name: Download examples

From d1d2e80f2e32f99ad5d8960765ad46a2dea971a9 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 11:03:29 +0200
Subject: [PATCH 16/32] fix(test): install rllib from setup.cfg

---
 .github/workflows/test-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 17d62741..56878dfc 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -77,7 +77,7 @@ jobs:
     - name: Clean up dependencies
       run: |
         pip uninstall -y stable-baselines3 gymnasium
-        pip install ray[rllib]
+        pip install .[rllib]
     - name: Download examples
       run: |
         make download_examples

From 6bc953928bc505d7321a9b85c625a3dfbe60fd39 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 11:30:52 +0200
Subject: [PATCH 17/32] feat(cleanrl): added --viz option to example

---
 examples/clean_rl_example.py          | 5 ++++-
 examples/stable_baselines3_example.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py
index 82595538..1f123b65 100644
--- a/examples/clean_rl_example.py
+++ b/examples/clean_rl_example.py
@@ -16,6 +16,9 @@
 def parse_args():
     # fmt: off
     parser = argparse.ArgumentParser()
+    parser.add_argument("--viz", default=False, type=bool,
+        help="If set, the simulation will be displayed in a window during training. Otherwise "
+            "training will run without rendering the simualtion. This setting does not apply to in-editor training.")
     parser.add_argument("--experiment_dir", default="logs/cleanrl", type=str,
         help="The name of the experiment directory, in which the tensorboard logs are getting stored")
     parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str,
@@ -154,7 +157,7 @@ def get_action_and_value(self, x, action=None):
 
     # env setup
     
-    envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=True, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
+    envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
     args.num_envs = envs.num_envs
     args.batch_size = int(args.num_envs * args.num_steps)
     args.minibatch_size = int(args.batch_size // args.num_minibatches)
diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
index 9ea90c66..c25de664 100644
--- a/examples/stable_baselines3_example.py
+++ b/examples/stable_baselines3_example.py
@@ -82,8 +82,8 @@
 parser.add_argument(
     "--viz",
     action="store_true",
-    help="If set, the window(s) with the Godot environment(s) will be displayed, otherwise "
-         "training will run without rendering the game. Does not apply to in-editor training.",
+    help="If set, the simulation will be displayed in a window during training. Otherwise "
+        "training will run without rendering the simualtion. This setting does not apply to in-editor training.",
     default=False
 )
 parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env")

From 696f8722c6cf4a2a8a5c25720da180213711df58 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 11:36:07 +0200
Subject: [PATCH 18/32] fix(test): also update pip in cleanup step

---
 .github/workflows/test-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 56878dfc..01299cdc 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -76,6 +76,7 @@ jobs:
         pip install .[test]
     - name: Clean up dependencies
       run: |
+        python -m pip install --upgrade pip
         pip uninstall -y stable-baselines3 gymnasium
         pip install .[rllib]
     - name: Download examples

From 888dec737e85c4edc88427d9259e51be0484282b Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 11:43:48 +0200
Subject: [PATCH 19/32] fix(test): set wheel version

---
 .github/workflows/test-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 01299cdc..a979d695 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -71,7 +71,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
+        python -m pip install --upgrade pip wheel==0.38.4
         # cpu version of pytorch
         pip install .[test]
     - name: Clean up dependencies

From 6468858343c9ef1a2f944bafe37738c6a56fde51 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 12:00:47 +0200
Subject: [PATCH 20/32] fix(test): also fix windows rllib pip instal

---
 .github/workflows/test-ci.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index a979d695..a3cb6c0b 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -76,7 +76,6 @@ jobs:
         pip install .[test]
     - name: Clean up dependencies
       run: |
-        python -m pip install --upgrade pip
         pip uninstall -y stable-baselines3 gymnasium
         pip install .[rllib]
     - name: Download examples
@@ -100,11 +99,13 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
+        python -m pip install --upgrade pip wheel==0.38.4
         # cpu version of pytorch
         pip install .[test]
+    - name: Clean up dependencies
+      run: |
         pip uninstall -y stable-baselines3 gymnasium
-        pip install ray[rllib]
+        pip install .[rllib]
     - name: Download examples
       run: |
         make download_examples

From 0ef70b41c270c53ec15833e302478e78e89d7eee Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 14:24:09 +0200
Subject: [PATCH 21/32] feat(sb3): make it possible to set seed

---
 examples/stable_baselines3_example.py         | 8 +++++++-
 godot_rl/wrappers/stable_baselines_wrapper.py | 6 +++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
index c25de664..ce035c5b 100644
--- a/examples/stable_baselines3_example.py
+++ b/examples/stable_baselines3_example.py
@@ -36,6 +36,12 @@
     help="The name of the experiment, which will be displayed in tensorboard and "
          "for checkpoint directory and name (if enabled).",
 )
+parser.add_argument(
+    "--seed",
+    type=int,
+    default=0,
+    help="seed of the experiment"
+)
 parser.add_argument(
     "--resume_model_path",
     default=None,
@@ -107,7 +113,7 @@
 if args.env_path is None and args.viz:
     print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.")
 
-env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, n_parallel=args.n_parallel,
+env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel,
                               speedup=args.speedup)
 env = VecMonitor(env)
 
diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py
index 2462b78e..fb723e3d 100644
--- a/godot_rl/wrappers/stable_baselines_wrapper.py
+++ b/godot_rl/wrappers/stable_baselines_wrapper.py
@@ -10,7 +10,7 @@
 
 
 class StableBaselinesGodotEnv(VecEnv):
-    def __init__(self, env_path: Optional[str] = None, n_parallel: int = 1, **kwargs) -> None:
+    def __init__(self, env_path: Optional[str] = None, n_parallel: int = 1, seed: int = 0, **kwargs) -> None:
         # If we are doing editor training, n_parallel must be 1
         if env_path is None and n_parallel > 1:
             raise ValueError("You must provide the path to a exported game executable if n_parallel > 1")
@@ -19,7 +19,7 @@ def __init__(self, env_path: Optional[str] = None, n_parallel: int = 1, **kwargs
         port = kwargs.pop("port", GodotEnv.DEFAULT_PORT)
 
         # Create a list of GodotEnv instances
-        self.envs = [GodotEnv(env_path=env_path, convert_action_space=True, port=port+p, seed=p, **kwargs) for p in range(n_parallel)]
+        self.envs = [GodotEnv(env_path=env_path, convert_action_space=True, port=port+p, seed=seed+p, **kwargs) for p in range(n_parallel)]
         
         # Store the number of parallel environments
         self.n_parallel = n_parallel
@@ -114,7 +114,7 @@ def get_attr(self, attr_name: str, indices = None) -> List[Any]:
             return [None for _ in range(self.num_envs)]
         raise AttributeError("get attr not fully implemented in godot-rl StableBaselinesWrapper")
 
-    def seed(self):
+    def seed(self, seed = None):
         raise NotImplementedError()
 
     def set_attr(self):

From 44ac81ef9763fac3dcb88c616b12a828a9512f0a Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 15:43:25 +0200
Subject: [PATCH 22/32] feat(sf): make it possible to set seed

---
 godot_rl/main.py                            |  1 +
 godot_rl/wrappers/sample_factory_wrapper.py | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/godot_rl/main.py b/godot_rl/main.py
index be20f8ba..f856e673 100644
--- a/godot_rl/main.py
+++ b/godot_rl/main.py
@@ -62,6 +62,7 @@ def get_args():
     parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
     parser.add_argument("--experiment_name", default="experiment", type=str, help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
+    parser.add_argument("--seed", default=0, type=int, help="seed of the experiment")
     
     args, extras =  parser.parse_known_args()
     if args.experiment_dir is None:
diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index a5da8e7e..4b164f2c 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -72,32 +72,32 @@ def render():
         return
 
 
-def make_godot_env_func(env_path, full_env_name, cfg=None, env_config=None, render_mode=None, speedup=1, viz=False):
-    seed = 0
+def make_godot_env_func(env_path, full_env_name, cfg=None, env_config=None, render_mode=None, seed=0, speedup=1, viz=False):
     port = cfg.base_port
     print("BASE PORT ", cfg.base_port)
     show_window = False
+    _seed = seed
     if env_config:
         port += 1 + env_config.env_id
-        seed += 1 + env_config.env_id
+        _seed += 1 + env_config.env_id
         print("env id", env_config.env_id)
         if viz:  #
             print("creating viz env")
             show_window = env_config.env_id == 0
     if cfg.batched_sampling:
         env = SampleFactoryEnvWrapperBatched(
-            env_path=env_path, port=port, seed=seed, show_window=show_window, speedup=speedup
+            env_path=env_path, port=port, seed=_seed, show_window=show_window, speedup=speedup
         )
     else:
         env = SampleFactoryEnvWrapperNonBatched(
-            env_path=env_path, port=port, seed=seed, show_window=show_window, speedup=speedup
+            env_path=env_path, port=port, seed=_seed, show_window=show_window, speedup=speedup
         )
 
     return env
 
 
 def register_gdrl_env(args):
-    make_env = partial(make_godot_env_func, args.env_path, speedup=args.speedup, viz=args.viz)
+    make_env = partial(make_godot_env_func, args.env_path, speedup=args.speedup, seed=args.seed, viz=args.viz)
     register_env("gdrl", make_env)
 
 
@@ -152,6 +152,7 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
         # apparently env.render(mode="human") is not supported anymore and we need to specify the render mode in
         # the env actor
         p.add_argument("--render_mode", default="human", type=str, help="")
+
     p.add_argument("--base_port", default=GodotEnv.DEFAULT_PORT, type=int, help="")
 
     p.add_argument(

From 4a1a0161abbd15691e0aa10d06595032b44749f0 Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 16:11:00 +0200
Subject: [PATCH 23/32] fix(sf): make experiment_dir work again

---
 godot_rl/wrappers/sample_factory_wrapper.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index 4b164f2c..c38f8b44 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -161,26 +161,14 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
         type=int,
         help="Num agents in each envpool (if used)",
     )
-    p.add_argument(
-        "--experiment_dir",
-        default="logs/sf",
-        type=str,
-        help="The name of the experiment directory, in which the tensorboard logs are getting stored",
-    )
-    p.add_argument(
-        "--experiment_name",
-        default=None,
-        type=str,
-        help="The name of the experiment, which will be displayed in tensorboard",
-    )
 
 
-def parse_gdrl_args(argv=None, evaluation=False):
+def parse_gdrl_args(args, argv=None, evaluation=False):
     parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
     add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation)
     gdrl_override_defaults(partial_cfg.env, parser)
     final_cfg = parse_full_cfg(parser, argv)
-    args, _ = parser.parse_known_args(argv)
+    
     final_cfg.train_dir = args.experiment_dir or "logs/sf"
     final_cfg.experiment = args.experiment_name or final_cfg.experiment
     return final_cfg
@@ -188,7 +176,7 @@ def parse_gdrl_args(argv=None, evaluation=False):
 
 def sample_factory_training(args, extras):
     register_gdrl_env(args)
-    cfg = parse_gdrl_args(argv=extras, evaluation=args.eval)
+    cfg = parse_gdrl_args(args=args, argv=extras, evaluation=args.eval)
     #cfg.base_port = random.randint(20000, 22000)
     status = run_rl(cfg)
     return status
@@ -196,7 +184,7 @@ def sample_factory_training(args, extras):
 
 def sample_factory_enjoy(args, extras):
     register_gdrl_env(args)
-    cfg = parse_gdrl_args(argv=extras, evaluation=args.eval)
+    cfg = parse_gdrl_args(args=args, argv=extras, evaluation=args.eval)
 
     status = enjoy(cfg)
     return status

From 69f14b5ce969f433f36023a42ce8da483c63d9cf Mon Sep 17 00:00:00 2001
From: visuallization <florentin.rieger@gmail.com>
Date: Fri, 28 Jul 2023 16:49:16 +0200
Subject: [PATCH 24/32] feat(rllib): make it possible to set a seed

---
 godot_rl/wrappers/ray_wrapper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
index 9c4caffe..01a6c195 100644
--- a/godot_rl/wrappers/ray_wrapper.py
+++ b/godot_rl/wrappers/ray_wrapper.py
@@ -126,6 +126,8 @@ def rllib_training(args, extras):
     register_env()
 
     exp["config"]["env_config"]["env_path"] = args.env_path
+    exp["config"]["env_config"]["seed"] = args.seed
+
     if args.env_path is not None:
         run_name = exp["algorithm"] + "/" + pathlib.Path(args.env_path).stem
     else:

From fc20ae4951e188629ff45d4e16e3a780dc54560d Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Fri, 28 Jul 2023 17:38:07 +0200
Subject: [PATCH 25/32] 1 worker, even less timesteps

Just checking if it makes a difference
---
 tests/fixtures/test_rllib.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fixtures/test_rllib.yaml b/tests/fixtures/test_rllib.yaml
index 955c6449..3c237b3c 100644
--- a/tests/fixtures/test_rllib.yaml
+++ b/tests/fixtures/test_rllib.yaml
@@ -4,7 +4,7 @@ algorithm: PPO
 stop:
     episode_reward_mean: 5000
     training_iteration: 1000
-    timesteps_total: 200
+    timesteps_total: 100
 
 config:
     env: godot
@@ -24,7 +24,7 @@ config:
     train_batch_size: 1024
     sgd_minibatch_size: 128
     num_sgd_iter: 16
-    num_workers: 4
+    num_workers: 1
     lr: 0.0003
     num_envs_per_worker: 16
     batch_mode: truncate_episodes

From 776258cab44c77459f071abb11113eafa0a123ff Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Sat, 29 Jul 2023 00:00:02 +0200
Subject: [PATCH 26/32] Attempting to add seed arg

---
 godot_rl/wrappers/sample_factory_wrapper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index c38f8b44..c9f6c224 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -155,6 +155,8 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
 
     p.add_argument("--base_port", default=GodotEnv.DEFAULT_PORT, type=int, help="")
 
+    p.add_argument("--seed", default=0, type=int, help="")
+
     p.add_argument(
         "--env_agents",
         default=2,

From ccb7b7ff02d0ce23acc5e1ca4584af0267dc2006 Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Sat, 29 Jul 2023 00:18:47 +0200
Subject: [PATCH 27/32] reverting change (didn't fix the issue)

---
 godot_rl/wrappers/sample_factory_wrapper.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py
index c9f6c224..c38f8b44 100644
--- a/godot_rl/wrappers/sample_factory_wrapper.py
+++ b/godot_rl/wrappers/sample_factory_wrapper.py
@@ -155,8 +155,6 @@ def add_gdrl_env_args(_env, p: argparse.ArgumentParser, evaluation=False):
 
     p.add_argument("--base_port", default=GodotEnv.DEFAULT_PORT, type=int, help="")
 
-    p.add_argument("--seed", default=0, type=int, help="")
-
     p.add_argument(
         "--env_agents",
         default=2,

From 37edec85e2535ba17b1b40b462f42dc4418e6e3d Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Sat, 29 Jul 2023 00:31:42 +0200
Subject: [PATCH 28/32] Another attempt to add seed

---
 examples/sample_factory_example.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/sample_factory_example.py b/examples/sample_factory_example.py
index 8c30aa8a..1fc5abec 100644
--- a/examples/sample_factory_example.py
+++ b/examples/sample_factory_example.py
@@ -7,7 +7,8 @@ def get_args():
     parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use")
     parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model")
     parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env")
-    parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model")
+    parser.add_argument("--seed", default=0, type=int, help="environment seed")
+    parser.add_argument("--export", default=False, action="store_true", help="whether to export the model")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
 
     return parser.parse_known_args()
@@ -23,4 +24,4 @@ def main():
         
         
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From e615641574e9d10d580ebf5bbae08b25963a510a Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Sat, 29 Jul 2023 00:43:04 +0200
Subject: [PATCH 29/32] Add --experiment_dir

---
 examples/sample_factory_example.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/sample_factory_example.py b/examples/sample_factory_example.py
index 1fc5abec..462bfeda 100644
--- a/examples/sample_factory_example.py
+++ b/examples/sample_factory_example.py
@@ -10,6 +10,8 @@ def get_args():
     parser.add_argument("--seed", default=0, type=int, help="environment seed")
     parser.add_argument("--export", default=False, action="store_true", help="whether to export the model")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
+    parser.add_argument("--experiment_dir", default="logs/sf", type=str,
+    help="The name of the experiment directory, in which the tensorboard logs are getting stored")
 
     return parser.parse_known_args()
 

From 648a152b8231e0d96158d5ce91c0b88067883faa Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Sat, 29 Jul 2023 00:45:48 +0200
Subject: [PATCH 30/32] Add --experiment_name

---
 examples/sample_factory_example.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/examples/sample_factory_example.py b/examples/sample_factory_example.py
index 462bfeda..2c4e10a6 100644
--- a/examples/sample_factory_example.py
+++ b/examples/sample_factory_example.py
@@ -12,6 +12,12 @@ def get_args():
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
     parser.add_argument("--experiment_dir", default="logs/sf", type=str,
     help="The name of the experiment directory, in which the tensorboard logs are getting stored")
+    parser.add_argument(
+        "--experiment_name",
+        default="experiment",
+        type=str,
+        help="The name of the experiment, which will be displayed in tensorboard. ",
+    )
 
     return parser.parse_known_args()
 

From d7488afb20f18539c9b27003b3e86ad142159f39 Mon Sep 17 00:00:00 2001
From: Edward Beeching <edbeeching@gmail.com>
Date: Mon, 31 Jul 2023 10:33:17 +0200
Subject: [PATCH 31/32] updates installtion instructions for rllib

---
 docs/ADV_RLLIB.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md
index 9a8cd388..fd5040a6 100644
--- a/docs/ADV_RLLIB.md
+++ b/docs/ADV_RLLIB.md
@@ -5,10 +5,13 @@
 ## Installation
 
 If you want to train with rllib, create a new environment e.g.: `python -m venv venv.rllib` as rllib's dependencies can conflict with those of sb3 and other libraries.
-
+Due to a version clash with gymnasium, stable-baselines3 much be uninstalled before installing rllib.
 ```bash
-# remove sb3 installation with pip uninstall godot-rl[sb3]
-pip install godot-rl[rllib]
+pip install godot-rl
+# remove sb3 and gymnasium installations
+pip uninstall -y stable-baselines3 gymnasium
+# install rllib
+pip install ray[rllib]
 ```
 
 ## Basic Environment Usage

From 8b21238ef3a55b1a5111695da011b29c3ffd2e43 Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Mon, 31 Jul 2023 15:58:46 +0200
Subject: [PATCH 32/32] Update ADV_RLLIB.md

fix typo
---
 docs/ADV_RLLIB.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md
index fd5040a6..0ae95ee5 100644
--- a/docs/ADV_RLLIB.md
+++ b/docs/ADV_RLLIB.md
@@ -5,7 +5,7 @@
 ## Installation
 
 If you want to train with rllib, create a new environment e.g.: `python -m venv venv.rllib` as rllib's dependencies can conflict with those of sb3 and other libraries.
-Due to a version clash with gymnasium, stable-baselines3 much be uninstalled before installing rllib.
+Due to a version clash with gymnasium, stable-baselines3 must be uninstalled before installing rllib.
 ```bash
 pip install godot-rl
 # remove sb3 and gymnasium installations