new training config

SainsburyWellcomeCentre · Jul 30, 2024 · 078d41f · 078d41f
1 parent 2c01eac
commit 078d41f
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 26 deletions.
diff --git a/examples/agent_examples/whittington_2020_run.py b/examples/agent_examples/whittington_2020_run.py
@@ -14,7 +14,7 @@
 from neuralplayground.experiments import Sargolini2006Data
 
 # Set the location for saving the results of the simulation
-simulation_id = "TEM_results_big_mem"
+simulation_id = "TEM_results_all_5x5"
 save_path = os.path.join(os.getcwd(), simulation_id)
 # save_path = os.path.join(os.getcwd(), "examples", "agent_examples", "trained_results")
 agent_class = Whittington2020
@@ -27,47 +27,47 @@
 # Set the x and y limits for the arena
 arena_x_limits = [
     [-5, 5],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
     [-5, 5],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
     [-5, 5],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
     [-5, 5],
 ]
 arena_y_limits = [
     [-5, 5],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
     [-5, 5],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
     [-5, 5],
-    [-4, 4],
     [-5, 5],
-    [-6, 6],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
+    [-5, 5],
     [-5, 5],
 ]
 
 # Set parameters for the environment that generates observations
 discrete_env_params = {
     "environment_name": "DiscreteObject",
-    "state_density": 1,
+    "state_density": 1 / 2,
     "n_objects": params["n_x"],
-    "agent_step_size": 1,  # Note: this must be 1 / state density
+    "agent_step_size": 2,  # Note: this must be 1 / state density
     "use_behavioural_data": False,
     "data_path": None,
     "experiment_class": Sargolini2006Data,
@@ -106,7 +106,7 @@
 }
 
 # Full model training consists of 20000 episodes
-training_loop_params = {"n_episode": 20000, "params": full_agent_params, "random_state": False, "custom_state": [0.0, 0.0]}
+training_loop_params = {"n_episode": 10000, "params": full_agent_params, "random_state": True, "custom_state": [0.0, 0.0]}
 
 # Create the training simulation object
 sim = SingleSim(

diff --git a/examples/agent_examples/whittington_slurm.sh b/examples/agent_examples/whittington_slurm.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 
-#SBATCH -J TEM_50G # job name
+#SBATCH -J TEM_all_5x5 # job name
 #SBATCH -p gpu # partition (queue)
 #SBATCH -N 1   # number of nodes
 #SBATCH --mem 50G # memory pool for all cores
 #SBATCH -n 4 # number of cores
 #SBATCH -t 0-72:00 # time (D-HH:MM)
 #SBATCH --gres gpu:1 # request 1 GPU (of any kind)
-#SBATCH -o TEM_50G.%x.%N.%j.out # STDOUT
-#SBATCH -e TEM_50G.%x.%N.%j.err # STDERR
+#SBATCH -o TEM_all_5x5.%x.%N.%j.out # STDOUT
+#SBATCH -e TEM_all_5x5.%x.%N.%j.err # STDERR
 
 source ~/.bashrc
 

diff --git a/neuralplayground/agents/whittington_2020.py b/neuralplayground/agents/whittington_2020.py
@@ -256,6 +256,25 @@ def update(self):
         self.final_model_input = model_input
 
         forward = self.tem(model_input, self.prev_iter)
+        # if self.prev_iter is None:
+        #     with open('OG_log.txt', 'a') as f:
+        #         f.write('Walk number: ' + str(self.global_steps) + '\n')
+        #         for c in model_input:
+        #             f.write('ID: ' + str(c[0]) + '\n')
+        #             f.write('Observation: ' + str([np.argmax(a) for a in c[1]]) + '\n')
+        #             f.write('Action: ' + str(c[2]) + '\n')
+        #         f.write('prev_iter: ' + str(self.prev_iter) + '\n')
+        # else:
+        #     with open('OG_log.txt', 'a') as f:
+        #         f.write('Walk number: ' + str(self.global_steps) + '\n')
+        #         for c in model_input:
+        #             f.write('ID: ' + str(c[0]) + '\n')
+        #             f.write('Observation: ' + str([np.argmax(a) for a in c[1]]) + '\n')
+        #             f.write('Action: ' + str(c[2]) + '\n')
+        #         f.write('prev_iter.L: ' + str(self.prev_iter[0].L) + '\n')
+        #         f.write('prev_iter.a: ' + str(self.prev_iter[0].a) + '\n')
+        #         f.write('prev_iter.M: ' + str(self.prev_iter[0].M) + '\n')
+        #         f.write('prev_iter.x: ' + str([torch.argmax(x) for x in self.prev_iter[0].x]) + '\n')
 
         # Accumulate loss from forward pass
         loss = torch.tensor(0.0)

diff --git a/neuralplayground/backend/training_loops.py b/neuralplayground/backend/training_loops.py
@@ -108,22 +108,21 @@ def tem_training_loop(
     current_steps = np.zeros(params["batch_size"], dtype=int)
 
     obs, state = env.reset(random_state=random_state, custom_state=custom_state)
-
     for i in range(n_episode):
         while agent.n_walk < params["n_rollout"]:
             actions = agent.batch_act(obs)
             obs, state, reward = env.step(actions, normalize_step=True)
-        agent.update()
+
         current_steps += params["n_rollout"]
         finished_walks = current_steps >= max_steps_per_env
-
         if any(finished_walks):
             for env_i in np.where(finished_walks)[0]:
                 env.reset_env(env_i)
                 agent.prev_iter[0].a[env_i] = None
 
                 max_steps_per_env[env_i] = np.random.randint(4000, 6000)
                 current_steps[env_i] = 0
+        agent.update()
     return agent, env, training_dict