diff --git a/api/env/index.html b/api/env/index.html index c7e1c319f..f1f20f8fa 100644 --- a/api/env/index.html +++ b/api/env/index.html @@ -2045,7 +2045,7 @@
__init__(self, env: Environment, reward_aggregator: Callable = <function sum at 0x7fd62457a790>, discount_aggregator: Callable = <function amax at 0x7fd62457af70>)
+__init__(self, env: Environment, reward_aggregator: Callable = <function sum at 0x7f2d8d729790>, discount_aggregator: Callable = <function amax at 0x7f2d8d729f70>)
special
@@ -2337,14 +2337,14 @@ Callable
a function to aggregate all agents rewards into a single scalar value, e.g. sum.
<function sum at 0x7fd62457a790>
<function sum at 0x7f2d8d729790>
discount_aggregator
Callable
a function to aggregate all agents discounts into a single scalar value, e.g. max.
<function amax at 0x7fd62457af70>
<function amax at 0x7f2d8d729f70>
jax.vmap
the wrapped environment (e.g. do not use with the VmapWrapper
),
which would lead to inefficient computation due to both the step
and reset
functions
being processed each time step
is called. Please use the VmapAutoResetWrapper
instead.
@@ -2729,6 +2730,61 @@ reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep[Observation]]
+
+
+#Resets the environment to an initial state.
+ +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
key |
+ chex.PRNGKey |
+ random key used to reset the environment. |
+ required | +
Returns:
+Type | +Description | +
---|---|
state |
+ State object corresponding to the new state of the environment, +timestep: TimeStep object corresponding the first timestep returned by the environment, |
+