trailing whitespace

SainsburyWellcomeCentre · Sep 3, 2024 · 709fc82 · 709fc82
1 parent cc0ad77
commit 709fc82
Show file tree

Hide file tree

Showing 6 changed files with 67 additions and 63 deletions.
diff --git a/examples/agent_examples/whittington_2020_example.ipynb b/examples/agent_examples/whittington_2020_example.ipynb
diff --git a/examples/agent_examples/whittington_2020_run.py b/examples/agent_examples/whittington_2020_run.py
@@ -14,7 +14,7 @@
 from neuralplayground.experiments import Sargolini2006Data
 
 # Set the location for saving the results of the simulation
-simulation_id = "TEM_var_walks"
+simulation_id = "TEM_test_5x5_small_walk"
 save_path = os.path.join(os.getcwd(), simulation_id)
 # save_path = os.path.join(os.getcwd(), "examples", "agent_examples", "trained_results")
 agent_class = Whittington2020
@@ -26,48 +26,48 @@
 
 # Set the x and y limits for the arena
 arena_x_limits = [
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
 ]
 arena_y_limits = [
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
-    [-5, 5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
 ]
 
 # Set parameters for the environment that generates observations
 discrete_env_params = {
     "environment_name": "DiscreteObject",
-    "state_density": 1 / 2,
+    "state_density": 1,
     "n_objects": params["n_x"],
-    "agent_step_size": 2,  # Note: this must be 1 / state density
+    "agent_step_size": 1,  # Note: this must be 1 / state density
     "use_behavioural_data": False,
     "data_path": None,
     "experiment_class": Sargolini2006Data,
@@ -106,7 +106,7 @@
 }
 
 # Full model training consists of 20000 episodes
-training_loop_params = {"n_episode": 20000, "params": full_agent_params, "random_state": True, "custom_state": [0.0, 0.0]}
+training_loop_params = {"n_episode": 1000, "params": full_agent_params, "random_state": False, "custom_state": [0.0, 0.0]}
 
 # Create the training simulation object
 sim = SingleSim(

diff --git a/examples/agent_examples/whittington_slurm.sh b/examples/agent_examples/whittington_slurm.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 
-#SBATCH -J TEM_var_walks # job name
+#SBATCH -J TEM_5x5 # job name
 #SBATCH -p gpu # partition (queue)
 #SBATCH -N 1   # number of nodes
 #SBATCH --mem 50G # memory pool for all cores
 #SBATCH -n 4 # number of cores
 #SBATCH -t 0-72:00 # time (D-HH:MM)
 #SBATCH --gres gpu:1 # request 1 GPU (of any kind)
-#SBATCH -o TEM_var_walks.%x.%N.%j.out # STDOUT
-#SBATCH -e TEM_var_walks.%x.%N.%j.err # STDERR
+#SBATCH -o TEM_5x5.%x.%N.%j.out # STDOUT
+#SBATCH -e TEM_5x5.%x.%N.%j.err # STDERR
 
 source ~/.bashrc
 

diff --git a/neuralplayground/agents/whittington_2020.py b/neuralplayground/agents/whittington_2020.py
@@ -214,7 +214,7 @@ def update(self):
         Compute forward pass through model, updating weights, calculating TEM variables and collecting
         losses / accuracies
         """
-        # self.iter = int((len(self.obs_history) / 20) - 1)
+        self.iter = int((len(self.obs_history) / 20) - 1)
         self.global_steps += 1
         history = self.obs_history[-self.pars["n_rollout"] :]
         locations = [[{"id": env_step[0], "shiny": None} for env_step in step] for step in history]
@@ -256,6 +256,9 @@ def update(self):
         self.final_model_input = model_input
 
         forward = self.tem(model_input, self.prev_iter)
+        chunk = [[step[0][0], np.argmax(step[1][0]), step[2][0]] for step in model_input]
+        for i in range(len(chunk)):
+            self.logger.info(chunk[i])
         # if self.prev_iter is None:
         #     with open('OG_log.txt', 'a') as f:
         #         f.write('Walk number: ' + str(self.global_steps) + '\n')
@@ -309,7 +312,7 @@ def update(self):
         acc_p, acc_g, acc_gt = np.mean([[np.mean(a) for a in step.correct()] for step in forward], axis=0)
         acc_p, acc_g, acc_gt = [a * 100 for a in (acc_p, acc_g, acc_gt)]
         # Log progress
-        if self.iter % 10 == 0:
+        if self.iter % 1 == 0:
             # Write series of messages to logger from this backprop iteration
             self.logger.info("Finished backprop iter {:d} in {:.2f} seconds.".format(self.iter, time.time() - start_time))
             self.logger.info(

diff --git a/neuralplayground/arenas/batch_environment.py b/neuralplayground/arenas/batch_environment.py
@@ -216,7 +216,7 @@ def plot_trajectories(self):
 
         # Adjust spacing between subplots
         plt.tight_layout()
-
+        plt.show()
         return fig, axs
 
     def collect_environment_info(self, model_input, history, environments):

diff --git a/neuralplayground/backend/training_loops.py b/neuralplayground/backend/training_loops.py
@@ -112,6 +112,7 @@ def tem_training_loop(
         while agent.n_walk < params["n_rollout"]:
             actions = agent.batch_act(obs)
             obs, state, reward = env.step(actions, normalize_step=True)
+        agent.update()
 
         current_steps += params["n_rollout"]
         finished_walks = current_steps >= max_steps_per_env
@@ -128,7 +129,7 @@ def tem_training_loop(
                 agent.logger.info(
                     "Iteration {:d}: new walk of length {:d} for batch entry {:d}".format(i, max_steps_per_env[env_i], env_i)
                 )
-        agent.update()
+    # env.plot_trajectories();
     return agent, env, training_dict