From 25c95149b316d7281cddb2f15ef0bba634d441de Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 21 Jul 2023 13:55:52 +0200 Subject: [PATCH 01/10] Adds --resume_model_path and --save_model_path --- examples/stable_baselines3_example.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index d13a85b9..b385dd9c 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -1,4 +1,6 @@ import argparse +import os +import pathlib from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx @@ -29,6 +31,20 @@ type=str, help="The name of the experiment, which will be displayed in tensorboard", ) +parser.add_argument( + "--resume_model_path", + default=None, + type=str, + help="The path to a model file previously saved using --save_model_path or a checkpoint saved using " + "--save_checkpoints_frequency. Use this to resume training from a saved model.", +) +parser.add_argument( + "--save_model_path", + default=None, + type=str, + help="The path to use for saving the trained sb3 model after training is complete. Saved model can be used later " + "to resume training. Extension will be set to .zip", +) parser.add_argument( "--onnx_export_path", default=None, @@ -44,7 +60,13 @@ env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup) env = VecMonitor(env) -model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log=args.experiment_dir) +if args.resume_model_path is None: + model = PPO("MultiInputPolicy", env, ent_coef=0.0001, verbose=2, n_steps=32, tensorboard_log=args.experiment_dir) +else: + path_zip = pathlib.Path(args.resume_model_path) + print("Loading model: " + os.path.abspath(path_zip)) + model = PPO.load(path_zip, env=env) + model.learn(1000000, tb_log_name=args.experiment_name) print("closing env") From 10d8130c492481ef0e4d5e564c3235a7c242051e Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 21 Jul 2023 14:23:56 +0200 Subject: [PATCH 02/10] Adds timesteps and implements saving --- examples/stable_baselines3_example.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index b385dd9c..c5e444c1 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -51,12 +51,19 @@ type=str, help="The Godot binary to use, do not include for in editor training", ) - +parser.add_argument( + "--timesteps", + default=1_000_000, + type=int, + help="The number of environment steps to train for, default is 1_000_000. If resuming from a saved model, " + "it will continue training for this amount of steps from the saved state without counting previously trained " + "steps", +) parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env") -parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to launch - requires --env_path to be set if > 1.") +parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to " + "launch - requires --env_path to be set if > 1.") args, extras = parser.parse_known_args() - env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup) env = VecMonitor(env) @@ -67,10 +74,19 @@ print("Loading model: " + os.path.abspath(path_zip)) model = PPO.load(path_zip, env=env) -model.learn(1000000, tb_log_name=args.experiment_name) +model.learn(args.timesteps, tb_log_name=args.experiment_name) print("closing env") env.close() +# Enforce the extension of onnx and zip when saving model to avoid potential conflicts in case of same name +# and extension used for both if args.onnx_export_path is not None: - export_ppo_model_as_onnx(model, args.onnx_export_path) + path_onnx = pathlib.Path(args.onnx_export_path).with_suffix(".onnx") + print("Exporting onnx to: " + os.path.abspath(path_onnx)) + export_ppo_model_as_onnx(model, str(path_onnx)) + +if args.save_model_path is not None: + path_zip = pathlib.Path(args.save_model_path).with_suffix(".zip") + print("Saving model to: " + os.path.abspath(path_zip)) + model.save(path_zip) From 7b8669254fb8e0d9b48f4f65128bc1a7b8e9a66b Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 21 Jul 2023 15:31:51 +0200 Subject: [PATCH 03/10] Adds auto-checkpoint saving and inference --- examples/stable_baselines3_example.py | 50 ++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index c5e444c1..b6e1a300 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -2,6 +2,7 @@ import os import pathlib +from stable_baselines3.common.callbacks import CheckpointCallback from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx from stable_baselines3 import PPO @@ -23,13 +24,14 @@ "--experiment_dir", default="logs/sb3", type=str, - help="The name of the experiment directory, in which the tensorboard logs are getting stored", + help="The name of the experiment directory, in which the tensorboard logs and checkpoints are getting stored." ) parser.add_argument( "--experiment_name", default="Experiment", type=str, - help="The name of the experiment, which will be displayed in tensorboard", + help="The name of the experiment, which will be displayed in tensorboard and " + "for checkpoint directory and name.", ) parser.add_argument( "--resume_model_path", @@ -45,6 +47,14 @@ help="The path to use for saving the trained sb3 model after training is complete. Saved model can be used later " "to resume training. Extension will be set to .zip", ) +parser.add_argument( + "--save_checkpoint_frequency", + default=None, + type=int, + help=("If set, will save checkpoints every 'frequency' environment steps. " + "Requires a unique --experiment_name or --experiment_dir for each run. " + "Does not need --save_model_path to be set. "), +) parser.add_argument( "--onnx_export_path", default=None, @@ -59,11 +69,27 @@ "it will continue training for this amount of steps from the saved state without counting previously trained " "steps", ) +parser.add_argument( + "--inference", + action="store_true", + help="Instead of training, it will run inference on a loaded model for --timesteps steps. " + "Requires --resume_model_path to be set." +) parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env") parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to " "launch - requires --env_path to be set if > 1.") args, extras = parser.parse_known_args() +path_checkpoint = os.path.join(args.experiment_dir, args.experiment_name + "_checkpoints") +abs_path_checkpoint = os.path.abspath(path_checkpoint) + +# Prevent overwriting existing checkpoints when starting a new experiment if checkpoint saving is enabled +if args.save_checkpoint_frequency is not None and os.path.isdir(path_checkpoint): + raise RuntimeError(abs_path_checkpoint + " folder already exists. " + "Use a different --experiment_dir, or --experiment_name," + "or if previous checkpoints are not needed anymore, " + "remove the folder containing the checkpoints. ") + env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup) env = VecMonitor(env) @@ -72,9 +98,25 @@ else: path_zip = pathlib.Path(args.resume_model_path) print("Loading model: " + os.path.abspath(path_zip)) - model = PPO.load(path_zip, env=env) + model = PPO.load(path_zip, env=env, tensorboard_log=args.experiment_dir) + print(model.ent_coef) -model.learn(args.timesteps, tb_log_name=args.experiment_name) +if args.inference: + obs = env.reset() + for i in range(args.timesteps): + action, _state = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) +else: + if args.save_checkpoint_frequency is None: + model.learn(args.timesteps, tb_log_name=args.experiment_name) + else: + print("Checkpoint saving enabled. Checkpoints will be saved to: " + abs_path_checkpoint) + checkpoint_callback = CheckpointCallback( + save_freq=(args.save_checkpoint_frequency // env.num_envs), + save_path=path_checkpoint, + name_prefix=args.experiment_name + ) + model.learn(args.timesteps, callback=checkpoint_callback, tb_log_name=args.experiment_name) print("closing env") env.close() From 602147b232671442dbcfc3c3dd39c46d3ef5a95e Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 21 Jul 2023 15:45:55 +0200 Subject: [PATCH 04/10] CL args help text update --- examples/stable_baselines3_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index b6e1a300..b478e04c 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -24,14 +24,14 @@ "--experiment_dir", default="logs/sb3", type=str, - help="The name of the experiment directory, in which the tensorboard logs and checkpoints are getting stored." + help="The name of the experiment directory, in which the tensorboard logs and checkpoints (if enabled) are getting stored." ) parser.add_argument( "--experiment_name", default="Experiment", type=str, help="The name of the experiment, which will be displayed in tensorboard and " - "for checkpoint directory and name.", + "for checkpoint directory and name (if enabled).", ) parser.add_argument( "--resume_model_path", From 7792893c1650f873b1abf6fd3586190c1c362028 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Fri, 21 Jul 2023 16:12:40 +0200 Subject: [PATCH 05/10] Added error message when using inference without resume_model_path --- examples/stable_baselines3_example.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index b478e04c..a8b16ade 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -90,7 +90,11 @@ "or if previous checkpoints are not needed anymore, " "remove the folder containing the checkpoints. ") -env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, speedup=args.speedup) +if args.inference and args.resume_model_path is None: + raise parser.error("Using --inference requires --resume_model_path to be set.") + +env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, + speedup=args.speedup) env = VecMonitor(env) if args.resume_model_path is None: From e6e621449270f17fe80f2e4a31f3e80033d00bd1 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Sat, 22 Jul 2023 21:24:09 +0200 Subject: [PATCH 06/10] Removes a left-over print from testing --- examples/stable_baselines3_example.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index a8b16ade..3177ce6d 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -103,7 +103,6 @@ path_zip = pathlib.Path(args.resume_model_path) print("Loading model: " + os.path.abspath(path_zip)) model = PPO.load(path_zip, env=env, tensorboard_log=args.experiment_dir) - print(model.ent_coef) if args.inference: obs = env.reset() From 537fdeecf29d50c9580dab1a7974b8b98be7958a Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Sun, 23 Jul 2023 14:40:00 +0200 Subject: [PATCH 07/10] Adds infer to resume training description Co-authored-by: Florentin Luca Rieger --- examples/stable_baselines3_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index 3177ce6d..bd2d9ea5 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -38,7 +38,7 @@ default=None, type=str, help="The path to a model file previously saved using --save_model_path or a checkpoint saved using " - "--save_checkpoints_frequency. Use this to resume training from a saved model.", + "--save_checkpoints_frequency. Use this to resume training or infer from a saved model.", ) parser.add_argument( "--save_model_path", From 91c4e01c34691750994d7687ab99e1b40a2c6fd2 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Sun, 23 Jul 2023 14:58:37 +0200 Subject: [PATCH 08/10] Default experiment name changed to lowercase --- examples/stable_baselines3_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index bd2d9ea5..5c82ffe8 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -28,7 +28,7 @@ ) parser.add_argument( "--experiment_name", - default="Experiment", + default="experiment", type=str, help="The name of the experiment, which will be displayed in tensorboard and " "for checkpoint directory and name (if enabled).", From 52651e0e7ce026e03f6fc021fdab883dd782faca Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Sun, 23 Jul 2023 15:21:17 +0200 Subject: [PATCH 09/10] Adds --viz argument for changing rendering mode --- examples/stable_baselines3_example.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index 5c82ffe8..c4f1aed9 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -24,7 +24,8 @@ "--experiment_dir", default="logs/sb3", type=str, - help="The name of the experiment directory, in which the tensorboard logs and checkpoints (if enabled) are getting stored." + help="The name of the experiment directory, in which the tensorboard logs and checkpoints (if enabled) are " + "getting stored." ) parser.add_argument( "--experiment_name", @@ -75,6 +76,13 @@ help="Instead of training, it will run inference on a loaded model for --timesteps steps. " "Requires --resume_model_path to be set." ) +parser.add_argument( + "--viz", + action="store_true", + help="If set, the window(s) with the Godot environment(s) will be displayed, otherwise " + "training will run without rendering the game. Does not apply to in-editor training.", + default=False +) parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env") parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to " "launch - requires --env_path to be set if > 1.") @@ -93,7 +101,10 @@ if args.inference and args.resume_model_path is None: raise parser.error("Using --inference requires --resume_model_path to be set.") -env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=True, n_parallel=args.n_parallel, +if args.env_path is None and args.viz: + print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.") + +env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, n_parallel=args.n_parallel, speedup=args.speedup) env = VecMonitor(env) From 82bd742c1b30a184329548a17673b108098fe271 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Sun, 23 Jul 2023 15:26:03 +0200 Subject: [PATCH 10/10] Add default=False to inference Co-authored-by: Florentin Luca Rieger --- examples/stable_baselines3_example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index c4f1aed9..fd9c22fc 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -72,6 +72,7 @@ ) parser.add_argument( "--inference", + default=False, action="store_true", help="Instead of training, it will run inference on a loaded model for --timesteps steps. " "Requires --resume_model_path to be set."