recent TEM updates

SainsburyWellcomeCentre · Oct 7, 2024 · 1fe31ab · 1fe31ab
1 parent 0ead088
commit 1fe31ab
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 28 deletions.
diff --git a/examples/agent_examples/whittington_2020_example.ipynb b/examples/agent_examples/whittington_2020_example.ipynb
diff --git a/examples/agent_examples/whittington_2020_run.py b/examples/agent_examples/whittington_2020_run.py
@@ -14,7 +14,7 @@
 from neuralplayground.experiments import Sargolini2006Data
 
 # Set the location for saving the results of the simulation
-simulation_id = "TEM_test_5x5_small_walk"
+simulation_id = "TEM_test_with_break"
 save_path = os.path.join(os.getcwd(), simulation_id)
 # save_path = os.path.join(os.getcwd(), "examples", "agent_examples", "trained_results")
 agent_class = Whittington2020
@@ -106,7 +106,7 @@
 }
 
 # Full model training consists of 20000 episodes
-training_loop_params = {"n_episode": 20000, "params": full_agent_params, "random_state": False, "custom_state": [0.0, 0.0]}
+training_loop_params = {"n_episode": 3000, "params": full_agent_params, "random_state": False, "custom_state": [0.0, 0.0]}
 
 # Create the training simulation object
 sim = SingleSim(

diff --git a/neuralplayground/agents/agent_core.py b/neuralplayground/agents/agent_core.py
@@ -79,9 +79,10 @@ def act(self, obs, policy_func=None):
 
         self.obs_history.append(obs)
         if len(self.obs_history) >= 1000:  # reset every 1000
-            self.obs_history = [
-                obs,
-            ]
+            # self.obs_history = [
+            #     obs,
+            # ]
+            self.obs_history.pop(0)
         if policy_func is not None:
             return policy_func(obs)
 

diff --git a/neuralplayground/agents/whittington_2020.py b/neuralplayground/agents/whittington_2020.py
@@ -311,6 +311,7 @@ def update(self):
         # Compute model accuracies
         acc_p, acc_g, acc_gt = np.mean([[np.mean(a) for a in step.correct()] for step in forward], axis=0)
         acc_p, acc_g, acc_gt = [a * 100 for a in (acc_p, acc_g, acc_gt)]
+        self.accuracies = (acc_p + acc_g + acc_gt) / 3
         # Log progress
         if self.iter % 1 == 0:
             # Write series of messages to logger from this backprop iteration

diff --git a/neuralplayground/backend/training_loops.py b/neuralplayground/backend/training_loops.py
@@ -104,7 +104,7 @@ def tem_training_loop(
 
     training_dict = [agent.mod_kwargs, env.env_kwargs, agent.tem.hyper]
 
-    max_steps_per_env = np.random.randint(4000, 6000, size=params["batch_size"])
+    max_steps_per_env = np.random.randint(4000, 5000, size=params["batch_size"])
     current_steps = np.zeros(params["batch_size"], dtype=int)
 
     obs, state = env.reset(random_state=random_state, custom_state=custom_state)