Skip to content

Commit

Permalink
new training config
Browse files Browse the repository at this point in the history
  • Loading branch information
LukeHollingsworth committed Jul 30, 2024
1 parent 2c01eac commit 078d41f
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 26 deletions.
40 changes: 20 additions & 20 deletions examples/agent_examples/whittington_2020_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from neuralplayground.experiments import Sargolini2006Data

# Set the location for saving the results of the simulation
simulation_id = "TEM_results_big_mem"
simulation_id = "TEM_results_all_5x5"
save_path = os.path.join(os.getcwd(), simulation_id)
# save_path = os.path.join(os.getcwd(), "examples", "agent_examples", "trained_results")
agent_class = Whittington2020
Expand All @@ -27,47 +27,47 @@
# Set the x and y limits for the arena
arena_x_limits = [
[-5, 5],
[-4, 4],
[-5, 5],
[-6, 6],
[-4, 4],
[-5, 5],
[-6, 6],
[-5, 5],
[-4, 4],
[-5, 5],
[-6, 6],
[-5, 5],
[-4, 4],
[-5, 5],
[-6, 6],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
]
arena_y_limits = [
[-5, 5],
[-4, 4],
[-5, 5],
[-6, 6],
[-4, 4],
[-5, 5],
[-6, 6],
[-5, 5],
[-4, 4],
[-5, 5],
[-6, 6],
[-5, 5],
[-4, 4],
[-5, 5],
[-6, 6],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
[-5, 5],
]

# Set parameters for the environment that generates observations
discrete_env_params = {
"environment_name": "DiscreteObject",
"state_density": 1,
"state_density": 1 / 2,
"n_objects": params["n_x"],
"agent_step_size": 1, # Note: this must be 1 / state density
"agent_step_size": 2, # Note: this must be 1 / state density
"use_behavioural_data": False,
"data_path": None,
"experiment_class": Sargolini2006Data,
Expand Down Expand Up @@ -106,7 +106,7 @@
}

# Full model training consists of 20000 episodes
training_loop_params = {"n_episode": 20000, "params": full_agent_params, "random_state": False, "custom_state": [0.0, 0.0]}
training_loop_params = {"n_episode": 10000, "params": full_agent_params, "random_state": True, "custom_state": [0.0, 0.0]}

# Create the training simulation object
sim = SingleSim(
Expand Down
6 changes: 3 additions & 3 deletions examples/agent_examples/whittington_slurm.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/bin/bash

#SBATCH -J TEM_50G # job name
#SBATCH -J TEM_all_5x5 # job name
#SBATCH -p gpu # partition (queue)
#SBATCH -N 1 # number of nodes
#SBATCH --mem 50G # memory pool for all cores
#SBATCH -n 4 # number of cores
#SBATCH -t 0-72:00 # time (D-HH:MM)
#SBATCH --gres gpu:1 # request 1 GPU (of any kind)
#SBATCH -o TEM_50G.%x.%N.%j.out # STDOUT
#SBATCH -e TEM_50G.%x.%N.%j.err # STDERR
#SBATCH -o TEM_all_5x5.%x.%N.%j.out # STDOUT
#SBATCH -e TEM_all_5x5.%x.%N.%j.err # STDERR

source ~/.bashrc

Expand Down
19 changes: 19 additions & 0 deletions neuralplayground/agents/whittington_2020.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,25 @@ def update(self):
self.final_model_input = model_input

forward = self.tem(model_input, self.prev_iter)
# if self.prev_iter is None:
# with open('OG_log.txt', 'a') as f:
# f.write('Walk number: ' + str(self.global_steps) + '\n')
# for c in model_input:
# f.write('ID: ' + str(c[0]) + '\n')
# f.write('Observation: ' + str([np.argmax(a) for a in c[1]]) + '\n')
# f.write('Action: ' + str(c[2]) + '\n')
# f.write('prev_iter: ' + str(self.prev_iter) + '\n')
# else:
# with open('OG_log.txt', 'a') as f:
# f.write('Walk number: ' + str(self.global_steps) + '\n')
# for c in model_input:
# f.write('ID: ' + str(c[0]) + '\n')
# f.write('Observation: ' + str([np.argmax(a) for a in c[1]]) + '\n')
# f.write('Action: ' + str(c[2]) + '\n')
# f.write('prev_iter.L: ' + str(self.prev_iter[0].L) + '\n')
# f.write('prev_iter.a: ' + str(self.prev_iter[0].a) + '\n')
# f.write('prev_iter.M: ' + str(self.prev_iter[0].M) + '\n')
# f.write('prev_iter.x: ' + str([torch.argmax(x) for x in self.prev_iter[0].x]) + '\n')

# Accumulate loss from forward pass
loss = torch.tensor(0.0)
Expand Down
5 changes: 2 additions & 3 deletions neuralplayground/backend/training_loops.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,22 +108,21 @@ def tem_training_loop(
current_steps = np.zeros(params["batch_size"], dtype=int)

obs, state = env.reset(random_state=random_state, custom_state=custom_state)

for i in range(n_episode):
while agent.n_walk < params["n_rollout"]:
actions = agent.batch_act(obs)
obs, state, reward = env.step(actions, normalize_step=True)
agent.update()

current_steps += params["n_rollout"]
finished_walks = current_steps >= max_steps_per_env

if any(finished_walks):
for env_i in np.where(finished_walks)[0]:
env.reset_env(env_i)
agent.prev_iter[0].a[env_i] = None

max_steps_per_env[env_i] = np.random.randint(4000, 6000)
current_steps[env_i] = 0
agent.update()
return agent, env, training_dict


Expand Down

0 comments on commit 078d41f

Please sign in to comment.