diff --git a/slides.html b/slides.html index 96a7f64..767a6ce 100644 --- a/slides.html +++ b/slides.html @@ -7769,8 +7769,9 @@

Environment

@@ -7960,7 +7961,7 @@

Set a target beam you want to achieve

env.target_beam_values = target_beam
 env.reset()  ##
-plt.figure(figsize = (7, 4))
+plt.figure(figsize=(7, 4))
 plt.imshow(env.render())  # Plot the screen image
 
@@ -8035,7 +8036,7 @@

Get familiar with the Gym environment

env = RescaleAction(env, -1, 1)  # rescales the action to the interval [-1, 1]
 env.reset()
 env.step(action)
-plt.figure(figsize = (7, 4))
+plt.figure(figsize=(7, 4))
 plt.imshow(env.render())
 
@@ -8096,16 +8097,17 @@

Get familiar with the Gym environment

env.reset()
 steps = 10
 
+
 def change_vertical_corrector(q1, q2, cv, q3, ch, steps, i):
     action = np.array([q1, q2, cv + 1 / steps * i, q3, ch])
     return action
 
 
-fig, ax = plt.subplots(1, figsize = (7, 4))
+fig, ax = plt.subplots(1, figsize=(7, 4))
 for i in range(steps):
     action = change_vertical_corrector(0.2, -0.2, -0.5, 0.3, 0, steps, i)
     env.step(action)
-    
+
     img = env.render()
     ax.imshow(img)
     display(fig)
@@ -8250,8 +8252,8 @@ 

Relevant config parameters

Reward = objective_improvement

Difference of the objective: -

$$ r_\mathrm{obj-improvement} = ( \mathrm{obj}_{j-1} - \mathrm{obj}_{j} ) / \mathrm{obj}_0 $$

-

$$ obj = \sum_{i}|b_i^\mathrm{(c)} - b_i^\mathrm{(t)}|$$

+

$$ r*\mathrm{obj-improvement} = ( \mathrm{obj}*{j-1} - \mathrm{obj}\_{j} ) / \mathrm{obj}\_0 $$

+

$$ obj = \sum\_{i}|b_i^\mathrm{(c)} - b_i^\mathrm{(t)}|$$

where $j$ is the index of the current time step.

@@ -8290,7 +8292,7 @@

Question

while not (terminated or truncated): action, _ = loaded_model.predict(observation) observation, reward, terminated, truncated, info = env.step(action) - + img = env.render() ax.imshow(img) display(fig) @@ -8369,7 +8371,7 @@

Question

while not (terminated or truncated): action, _ = loaded_model.predict(observation) observation, reward, terminated, truncated, info = env.step(action) - + img = env.render() ax.imshow(img) display(fig) @@ -8412,7 +8414,7 @@

Relevant config parameters

Reward = objective_improvement

Difference of the objective: -

$$ r_\mathrm{obj-improvement} = ( \mathrm{obj}_{j-1} - \mathrm{obj}_{j} ) / \mathrm{obj}_0 $$ +

$$ r*\mathrm{obj-improvement} = ( \mathrm{obj}*{j-1} - \mathrm{obj}_{j} ) / \mathrm{obj}\_0 $$ $$ obj = \sum_{i}|b_i^\mathrm{(c)} - b_i^\mathrm{(t)}|$$

where $j$ is the index of the current time step.

@@ -8453,7 +8455,7 @@

Question

while not (terminated or truncated): action, _ = loaded_model.predict(observation) observation, reward, terminated, truncated, info = env.step(action) - + img = env.render() ax.imshow(img) display(fig) @@ -8496,7 +8498,7 @@

Relevant config parameters

Reward = objective_improvement

Difference of the objective: -

$$ r_\mathrm{obj-improvement} = ( \mathrm{obj}_{j-1} - \mathrm{obj}_{j} ) / \mathrm{obj}_0 $$ +

$$ r*\mathrm{obj-improvement} = ( \mathrm{obj}*{j-1} - \mathrm{obj}_{j} ) / \mathrm{obj}\_0 $$ $$ obj = \sum_{i}|b_i^\mathrm{(c)} - b_i^\mathrm{(t)}|$$

where $j$ is the index of the current time step.

@@ -8537,7 +8539,7 @@

Question

while not (terminated or truncated): action, _ = loaded_model.predict(observation) observation, reward, terminated, truncated, info = env.step(action) - + img = env.render() ax.imshow(img) display(fig) @@ -8580,7 +8582,7 @@

Relevant config parameters

Reward = negative_objective"

$$ \mathrm{obj} = \sum_{i}|b_i^\mathrm{(c)} - b_i^\mathrm{(t)}|$$ -

$$ r_\mathrm{neg-obj} = -1 * \mathrm{obj} / \mathrm{obj}_0 $$

+

$$ r\_\mathrm{neg-obj} = -1 \* \mathrm{obj} / \mathrm{obj}\_0 $$

where $b = [\mu_x,\sigma_x,\mu_y,\sigma_y]$, $b^\mathrm{(c)}$ is the current beam, and $b^\mathrm{(t)}$ is the target beam. $\mathrm{obj}_0$ is the initial objective after reset.

@@ -8619,7 +8621,7 @@

Question

while not (terminated or truncated): action, _ = loaded_model.predict(observation) observation, reward, terminated, truncated, info = env.step(action) - + img = env.render() ax.imshow(img) display(fig) @@ -8973,7 +8975,7 @@

Agent evaluation

InĀ [26]:
-
plt.figure(figsize = (7,4))
+
plt.figure(figsize=(7, 4))
 evaluate_ares_ea_agent(agent_under_investigation, include_position=False, n=2000)
 
@@ -9037,7 +9039,7 @@

Agent evaluation

while not done: action, _ = loaded_model.predict(observation) observation, reward, done, info = env.step(action) - + img = env.render(mode="rgb_array") ax.imshow(img) display(fig)