From 4ce3b8b5d5c60106ce1cc63ae341ac15ad6fca7d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 8 Jul 2024 17:31:22 +0100
Subject: [PATCH 01/21] Define mlflow experiment and run name with reference to
 the trained model

---
 crabs/detector/evaluate_model.py | 31 ++++++++++++++++++++++-------
 crabs/detector/utils/evaluate.py | 34 ++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 37c3cf01..f3f554b4 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -11,7 +11,6 @@
 from crabs.detector.datamodules import CrabsDataModule
 from crabs.detector.models import FasterRCNN
 from crabs.detector.utils.detection import (
-    set_mlflow_run_name,
     setup_mlflow_logger,
     slurm_logs_as_artifacts,
 )
@@ -20,6 +19,9 @@
     get_cli_arg_from_ckpt,
     get_config_from_ckpt,
     get_img_directories_from_ckpt,
+    get_mlflow_experiment_name_from_ckpt,
+    get_mlflow_parameters_from_ckpt,
+    get_mlflow_run_name_from_ckpt,
 )
 from crabs.detector.utils.visualization import save_images_with_boxes
 
@@ -41,6 +43,9 @@ def __init__(self, args: argparse.Namespace) -> None:
 
         # trained model
         self.trained_model_path = args.trained_model_path
+        self.trained_model_run_name = get_mlflow_parameters_from_ckpt(
+            self.trained_model_path
+        )["run_name"]
 
         # config: retreieve from ckpt if not passed as CLI argument
         self.config_file = args.config_file
@@ -66,7 +71,9 @@ def __init__(self, args: argparse.Namespace) -> None:
         self.accelerator = args.accelerator
 
         # MLflow
-        self.experiment_name = args.experiment_name
+        self.experiment_name = get_mlflow_experiment_name_from_ckpt(
+            args=self.args, trained_model_path=self.trained_model_path
+        )
         self.mlflow_folder = args.mlflow_folder
 
         # Debugging
@@ -81,12 +88,14 @@ def __init__(self, args: argparse.Namespace) -> None:
     def setup_trainer(self):
         """Set up trainer object with logging for testing."""
         # Assign run name
-        self.run_name = set_mlflow_run_name()
+        self.run_name = get_mlflow_run_name_from_ckpt(
+            self.args.mlflow_run_name_auto, self.trained_model_run_name
+        )
 
         # Setup logger
         mlf_logger = setup_mlflow_logger(
-            experiment_name=self.experiment_name,
-            run_name=self.run_name,
+            experiment_name=self.experiment_name,  # get from ckpt?
+            run_name=self.run_name,  # -------------->get from ckpt?
             mlflow_folder=self.mlflow_folder,
             cli_args=self.args,
         )
@@ -107,7 +116,9 @@ def evaluate_model(self) -> None:
             list_annotation_files=self.annotation_files,
             split_seed=self.seed_n,
             config=self.config,
+            no_data_augmentation=True,
         )
+        # breakpoint()
 
         # Get trained model
         trained_model = FasterRCNN.load_from_checkpoint(
@@ -220,13 +231,12 @@ def evaluate_parse_args(args):
     parser.add_argument(
         "--experiment_name",
         type=str,
-        default="Sept2023_evaluation",
         help=(
             "Name of the experiment in MLflow, under which the current run "
             "will be logged. "
             "For example, the name of the dataset could be used, to group "
             "runs using the same data. "
-            "Default: Sept2023_evaluation"
+            "By default: <experiment_training_job>_evaluation."
         ),
     )
     parser.add_argument(
@@ -250,6 +260,13 @@ def evaluate_parse_args(args):
         default="./ml-runs",
         help=("Path to MLflow directory. Default: ./ml-runs"),
     )
+    parser.add_argument(
+        "--mlflow_run_name_auto",
+        action="store_true",
+        help=(
+            "Set the evaluation run name automatically from MLflow, ignoring the training job run name."
+        ),
+    )
     parser.add_argument(
         "--save_frames",
         action="store_true",
diff --git a/crabs/detector/utils/evaluate.py b/crabs/detector/utils/evaluate.py
index e2633291..b24a113a 100644
--- a/crabs/detector/utils/evaluate.py
+++ b/crabs/detector/utils/evaluate.py
@@ -2,7 +2,6 @@
 
 import argparse
 import ast
-import logging
 import sys
 from pathlib import Path
 
@@ -12,10 +11,9 @@
 from crabs.detector.utils.detection import (
     prep_annotation_files,
     prep_img_directories,
+    set_mlflow_run_name,
 )
 
-logging.basicConfig(level=logging.INFO)
-
 
 def compute_precision_recall(class_stats: dict) -> tuple[float, float, dict]:
     """Compute precision and recall.
@@ -143,7 +141,8 @@ def get_mlflow_parameters_from_ckpt(trained_model_path: str) -> dict:
     # get parameters of the run
     run = mlrun_client.get_run(ckpt_runID)
     params = run.data.params
-
+    params["run_name"] = run.info.run_name
+    
     return params
 
 
@@ -242,3 +241,30 @@ def get_annotation_files_from_ckpt(
         input_annotation_files, dataset_dirs
     )
     return annotation_files
+
+def get_mlflow_experiment_name_from_ckpt(
+    args: argparse.Namespace, trained_model_path: str
+) -> str:
+    """Define experiment name for MLflow from the training job one if not passed as CLI."""
+
+    if getattr(args, "experiment_name"):
+        experiment_name = getattr(args, "experiment_name")
+    else:
+        params = get_mlflow_parameters_from_ckpt(trained_model_path)
+        trained_model_expt_name = params["cli_args/experiment_name"]
+        experiment_name = trained_model_expt_name + "_evaluation"
+
+    return experiment_name
+
+
+def get_mlflow_run_name_from_ckpt(
+    mlflow_run_name_auto: bool, trained_model_run_name: str
+) -> str:  # ---- should be unique
+    """Define run name for eval job from the trained model job"""
+
+    if mlflow_run_name_auto:
+        run_name = set_mlflow_run_name()
+    else:
+        run_name = trained_model_run_name + "_eval_" + set_mlflow_run_name()
+
+    return run_name

From 4a7212564f9159744c2ec60b42fd93aa6305457f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:28:24 +0100
Subject: [PATCH 02/21] First draft bash script

---
 bash_scripts/run_evaluation_array.sh | 116 +++++++++++++++++++++++++++
 crabs/detector/evaluate_model.py     |   3 +-
 2 files changed, 118 insertions(+), 1 deletion(-)
 create mode 100644 bash_scripts/run_evaluation_array.sh

diff --git a/bash_scripts/run_evaluation_array.sh b/bash_scripts/run_evaluation_array.sh
new file mode 100644
index 00000000..aed3f2b8
--- /dev/null
+++ b/bash_scripts/run_evaluation_array.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+#SBATCH -p gpu # a100 # partition
+#SBATCH --gres=gpu:1 # gpu:a100_2g.10gb  # For any GPU: --gres=gpu:1. For a specific one: --gres=gpu:rtx5000
+#SBATCH -N 1   # number of nodes
+#SBATCH --ntasks-per-node 8 # 2 # max number of tasks per node
+#SBATCH --mem 32G # memory pool for all cores
+#SBATCH -t 3-00:00 # time (D-HH:MM)
+#SBATCH -o slurm_array.%A-%a.%N.out
+#SBATCH -e slurm_array.%A-%a.%N.err
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=s.minano@ucl.ac.uk
+#SBATCH --array=0-2%3
+
+
+# NOTE on SBATCH command for array jobs
+# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time.
+# the number of array jobs should match the number of input files
+
+# ---------------------
+# Source bashrc
+# ----------------------
+# Otherwise `which python` points to the miniconda module's Python
+source ~/.bashrc
+
+
+# memory
+# see https://pytorch.org/docs/stable/notes/cuda.html#environment-variables
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+
+# -----------------------------
+# Error settings for bash
+# -----------------------------
+# see https://wizardzines.com/comics/bash-errors/
+set -e  # do not continue after errors
+set -u  # throw error if variable is unset
+set -o pipefail  # make the pipe fail if any part of it fails
+
+# ---------------------
+# Define variables
+# ----------------------
+
+# List of models to evaluate
+MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/fe9a6c2f491a4496aade5034c75316cc/checkpoints
+LIST_CKPT_FILES=("$MLFLOW_CKPTS_FOLDER"/*.ckpt)
+
+# selected model
+CKPT_PATH=${LIST_CKPT_FILES[${SLURM_ARRAY_TASK_ID}]}
+
+# destination mlflow folder
+# EXPERIMENT_NAME="Sept2023" ----> get from training job
+MLFLOW_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs
+
+# version of the codebase
+GIT_BRANCH=main
+
+# --------------------
+# Check inputs
+# --------------------
+# Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT
+# if not, exit
+if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#LIST_CKPT_FILES[@]} ]]; then
+    echo "The number of array tasks does not match the number of .ckpt files"
+    exit 1
+fi
+
+# -----------------------------
+# Create virtual environment
+# -----------------------------
+module load miniconda
+
+# Define a environment for each job in the
+# temporary directory of the compute node
+ENV_NAME=crabs-dev-$SPLIT_SEED-$SLURM_ARRAY_JOB_ID
+ENV_PREFIX=$TMPDIR/$ENV_NAME
+
+# create environment
+conda create \
+    --prefix $ENV_PREFIX \
+    -y \
+    python=3.10
+
+# activate environment
+conda activate $ENV_PREFIX
+
+# install crabs package in virtual env
+python -m pip install git+https://github.com/SainsburyWellcomeCentre/crabs-exploration.git@$GIT_BRANCH
+
+
+# log pip and python locations
+echo $ENV_PREFIX
+which python
+which pip
+
+# print the version of crabs package (last number is the commit hash)
+echo "Git branch: $GIT_BRANCH"
+conda list crabs
+echo "-----"
+
+# ------------------------------------
+# GPU specs
+# ------------------------------------
+echo "Memory used per GPU before training"
+echo $(nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used --format=csv) #noheader
+echo "-----"
+
+
+# -------------------
+# Run training script
+# -------------------
+echo "Evaluating trained model at $CKPT_PATH: "
+evaluate-detector  \
+ --trained_model_path $CKPT_PATH \
+ --accelerator gpu \
+ --mlflow_folder $MLFLOW_FOLDER \
+echo "-----"
diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index f3f554b4..c8cb032b 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -25,6 +25,8 @@
 )
 from crabs.detector.utils.visualization import save_images_with_boxes
 
+logging.getLogger().setLevel(logging.INFO)
+
 
 class DetectorEvaluate:
     """Interface for evaluating an object detector.
@@ -118,7 +120,6 @@ def evaluate_model(self) -> None:
             config=self.config,
             no_data_augmentation=True,
         )
-        # breakpoint()
 
         # Get trained model
         trained_model = FasterRCNN.load_from_checkpoint(

From 6b686417fbc9837e5a90cb92075d71955ff3e7ac Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:41:20 +0100
Subject: [PATCH 03/21] add script to select best model

---
 bash_scripts/select_best_model.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 bash_scripts/select_best_model.sh

diff --git a/bash_scripts/select_best_model.sh b/bash_scripts/select_best_model.sh
new file mode 100644
index 00000000..e69de29b

From e110eb5ff79cc8c7e9e0f25fcd1f32d143353d72 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:52:06 +0100
Subject: [PATCH 04/21] Add checkpoint path to evaluation run name

---
 crabs/detector/evaluate_model.py |  4 +++-
 crabs/detector/utils/evaluate.py | 12 ++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index c8cb032b..32d2821b 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -91,7 +91,9 @@ def setup_trainer(self):
         """Set up trainer object with logging for testing."""
         # Assign run name
         self.run_name = get_mlflow_run_name_from_ckpt(
-            self.args.mlflow_run_name_auto, self.trained_model_run_name
+            mlflow_run_name_auto=self.args.mlflow_run_name_auto,
+            trained_model_run_name=self.trained_model_run_name,
+            trained_model_path=self.trained_model_path,
         )
 
         # Setup logger
diff --git a/crabs/detector/utils/evaluate.py b/crabs/detector/utils/evaluate.py
index b24a113a..2937f05b 100644
--- a/crabs/detector/utils/evaluate.py
+++ b/crabs/detector/utils/evaluate.py
@@ -258,13 +258,21 @@ def get_mlflow_experiment_name_from_ckpt(
 
 
 def get_mlflow_run_name_from_ckpt(
-    mlflow_run_name_auto: bool, trained_model_run_name: str
+    mlflow_run_name_auto: bool,
+    trained_model_run_name: str,
+    trained_model_path: str,
 ) -> str:  # ---- should be unique
     """Define run name for eval job from the trained model job"""
 
     if mlflow_run_name_auto:
         run_name = set_mlflow_run_name()
     else:
-        run_name = trained_model_run_name + "_eval_" + set_mlflow_run_name()
+        run_name = (
+            trained_model_run_name
+            + "_"
+            + Path(trained_model_path).stem
+            + "_eval"
+            + set_mlflow_run_name().replace("run", "")
+        )
 
     return run_name

From b35f727ab35ef5299acdce3bb937e9beb09c5b13 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 29 Oct 2024 17:39:39 +0000
Subject: [PATCH 05/21] Fix ruff

---
 crabs/detector/evaluate_model.py |  3 ++-
 crabs/detector/utils/evaluate.py | 14 ++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 32d2821b..0721ffa2 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -267,7 +267,8 @@ def evaluate_parse_args(args):
         "--mlflow_run_name_auto",
         action="store_true",
         help=(
-            "Set the evaluation run name automatically from MLflow, ignoring the training job run name."
+            "Set the evaluation run name automatically from MLflow, "
+            "ignoring the training job run name."
         ),
     )
     parser.add_argument(
diff --git a/crabs/detector/utils/evaluate.py b/crabs/detector/utils/evaluate.py
index 2937f05b..34676396 100644
--- a/crabs/detector/utils/evaluate.py
+++ b/crabs/detector/utils/evaluate.py
@@ -142,7 +142,7 @@ def get_mlflow_parameters_from_ckpt(trained_model_path: str) -> dict:
     run = mlrun_client.get_run(ckpt_runID)
     params = run.data.params
     params["run_name"] = run.info.run_name
-    
+
     return params
 
 
@@ -242,13 +242,16 @@ def get_annotation_files_from_ckpt(
     )
     return annotation_files
 
+
 def get_mlflow_experiment_name_from_ckpt(
     args: argparse.Namespace, trained_model_path: str
 ) -> str:
-    """Define experiment name for MLflow from the training job one if not passed as CLI."""
+    """Define MLflow experiment name from the training job.
 
-    if getattr(args, "experiment_name"):
-        experiment_name = getattr(args, "experiment_name")
+    Only used if the experiment name is not passed via CLI.
+    """
+    if args.experiment_name:
+        experiment_name = args.experiment_name
     else:
         params = get_mlflow_parameters_from_ckpt(trained_model_path)
         trained_model_expt_name = params["cli_args/experiment_name"]
@@ -262,8 +265,7 @@ def get_mlflow_run_name_from_ckpt(
     trained_model_run_name: str,
     trained_model_path: str,
 ) -> str:  # ---- should be unique
-    """Define run name for eval job from the trained model job"""
-
+    """Define run name for eval job from the trained model job."""
     if mlflow_run_name_auto:
         run_name = set_mlflow_run_name()
     else:

From e6cfb6a4dc78b9495d2d6874e932c0e7eb6c1eab Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 29 Oct 2024 17:40:18 +0000
Subject: [PATCH 06/21] Remove select best model empty script

---
 bash_scripts/select_best_model.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 bash_scripts/select_best_model.sh

diff --git a/bash_scripts/select_best_model.sh b/bash_scripts/select_best_model.sh
deleted file mode 100644
index e69de29b..00000000

From c4b4b3339169f5f06a7fed4bc072c3a86d65e1bf Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 29 Oct 2024 18:37:40 +0000
Subject: [PATCH 07/21] Log dataset info and trained model info to mlflow

---
 crabs/detector/evaluate_model.py | 34 ++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 0721ffa2..d62afbc2 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -43,13 +43,17 @@ def __init__(self, args: argparse.Namespace) -> None:
         # CLI inputs
         self.args = args
 
-        # trained model
+        # trained model data
         self.trained_model_path = args.trained_model_path
-        self.trained_model_run_name = get_mlflow_parameters_from_ckpt(
+        trained_model_params = get_mlflow_parameters_from_ckpt(
             self.trained_model_path
-        )["run_name"]
+        )
+        self.trained_model_run_name = trained_model_params["run_name"]
+        self.trained_model_experiment_name = trained_model_params[
+            "cli_args/experiment_name"
+        ]
 
-        # config: retreieve from ckpt if not passed as CLI argument
+        # config: retrieve from ckpt if not passed as CLI argument
         self.config_file = args.config_file
         self.config = get_config_from_ckpt(
             config_file=self.config_file,
@@ -90,6 +94,7 @@ def __init__(self, args: argparse.Namespace) -> None:
     def setup_trainer(self):
         """Set up trainer object with logging for testing."""
         # Assign run name
+        # TODO: mlflow_run_name_auto should be default I think
         self.run_name = get_mlflow_run_name_from_ckpt(
             mlflow_run_name_auto=self.args.mlflow_run_name_auto,
             trained_model_run_name=self.trained_model_run_name,
@@ -98,12 +103,29 @@ def setup_trainer(self):
 
         # Setup logger
         mlf_logger = setup_mlflow_logger(
-            experiment_name=self.experiment_name,  # get from ckpt?
-            run_name=self.run_name,  # -------------->get from ckpt?
+            experiment_name=self.experiment_name,
+            run_name=self.run_name,
             mlflow_folder=self.mlflow_folder,
             cli_args=self.args,
         )
 
+        # Add trained model section
+        mlf_logger.log_hyperparams(
+            {
+                "trained_model/run_name": self.trained_model_run_name,
+                "trained_model/experiment_name": self.trained_model_experiment_name,
+            }
+        )
+
+        # Add dataset section
+        mlf_logger.log_hyperparams(
+            {
+                "dataset/images_dir": self.images_dirs,
+                "dataset/annotation_files": self.annotation_files,
+                "dataset/seed": self.seed_n,
+            }
+        )
+
         # Return trainer linked to logger
         return lightning.Trainer(
             accelerator=self.accelerator,

From c91c65e7cede690650f7103477b5affabb56a63d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 29 Oct 2024 18:42:29 +0000
Subject: [PATCH 08/21] Print MLflow details to screen

---
 crabs/detector/evaluate_model.py | 49 +++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index d62afbc2..e230c1bc 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import sys
+from pathlib import Path
 
 import lightning
 import torch
@@ -49,7 +50,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             self.trained_model_path
         )
         self.trained_model_run_name = trained_model_params["run_name"]
-        self.trained_model_experiment_name = trained_model_params[
+        self.trained_model_expt_name = trained_model_params[
             "cli_args/experiment_name"
         ]
 
@@ -90,6 +91,7 @@ def __init__(self, args: argparse.Namespace) -> None:
         logging.info(f"Images directories: {self.images_dirs}")
         logging.info(f"Annotation files: {self.annotation_files}")
         logging.info(f"Seed: {self.seed_n}")
+        logging.info("---------------------------------")
 
     def setup_trainer(self):
         """Set up trainer object with logging for testing."""
@@ -100,6 +102,12 @@ def setup_trainer(self):
             trained_model_run_name=self.trained_model_run_name,
             trained_model_path=self.trained_model_path,
         )
+        # TODO: add these logs to training too
+        logging.info("MLflow logs for current job")
+        logging.info(f"Experiment name: {self.experiment_name}")
+        logging.info(f"Run name: {self.run_name}")
+        logging.info(f"Folder: {Path(self.mlflow_folder).resolve()}")
+        logging.info("---------------------------------")
 
         # Setup logger
         mlf_logger = setup_mlflow_logger(
@@ -113,7 +121,7 @@ def setup_trainer(self):
         mlf_logger.log_hyperparams(
             {
                 "trained_model/run_name": self.trained_model_run_name,
-                "trained_model/experiment_name": self.trained_model_experiment_name,
+                "trained_model/experiment_name": self.trained_model_expt_name,
             }
         )
 
@@ -261,29 +269,17 @@ def evaluate_parse_args(args):
             "will be logged. "
             "For example, the name of the dataset could be used, to group "
             "runs using the same data. "
-            "By default: <experiment_training_job>_evaluation."
-        ),
-    )
-    parser.add_argument(
-        "--fast_dev_run",
-        action="store_true",
-        help="Debugging option to run training for one batch and one epoch",
-    )
-    parser.add_argument(
-        "--limit_test_batches",
-        type=float,
-        default=1.0,
-        help=(
-            "Debugging option to run training on a fraction of "
-            "the training set."
-            "Default: 1.0 (all the training set)"
+            "By default: <trained_model_mlflow_experiment_name>_evaluation."
         ),
     )
     parser.add_argument(
         "--mlflow_folder",
         type=str,
         default="./ml-runs",
-        help=("Path to MLflow directory. Default: ./ml-runs"),
+        help=(
+            "Path to MLflow directory where to log the evaluation data. "
+            "Default: ./ml-runs"
+        ),
     )
     parser.add_argument(
         "--mlflow_run_name_auto",
@@ -318,6 +314,21 @@ def evaluate_parse_args(args):
             "under the current working directory."
         ),
     )
+    parser.add_argument(
+        "--fast_dev_run",
+        action="store_true",
+        help="Debugging option to run training for one batch and one epoch",
+    )
+    parser.add_argument(
+        "--limit_test_batches",
+        type=float,
+        default=1.0,
+        help=(
+            "Debugging option to run training on a fraction of "
+            "the training set."
+            "Default: 1.0 (all the training set)"
+        ),
+    )
     return parser.parse_args(args)
 
 

From 43f2fe99bd38e681b17340686c8861b585acd5c0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 29 Oct 2024 18:43:24 +0000
Subject: [PATCH 09/21] Small edits to comments

---
 crabs/detector/evaluate_model.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index e230c1bc..bc7b7ebd 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -102,7 +102,7 @@ def setup_trainer(self):
             trained_model_run_name=self.trained_model_run_name,
             trained_model_path=self.trained_model_path,
         )
-        # TODO: add these logs to training too
+        # TODO: add these logs to training job too
         logging.info("MLflow logs for current job")
         logging.info(f"Experiment name: {self.experiment_name}")
         logging.info(f"Run name: {self.run_name}")
@@ -159,13 +159,18 @@ def evaluate_model(self) -> None:
         )
 
         # Run testing
+        # TODO: Optionally on validation set?
+        # trainer.validate(
+        # trained_model,
+        # data_module,
+        # )
         trainer = self.setup_trainer()
         trainer.test(
             trained_model,
             data_module,
         )
 
-        # Save images if required
+        # Save images with bounding boxes if required
         if self.args.save_frames:
             save_images_with_boxes(
                 test_dataloader=data_module.test_dataloader(),

From f2555326f8b94a25e7a324238261ef802b751c2b Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 30 Oct 2024 11:06:08 +0000
Subject: [PATCH 10/21] Rename output folder for evaluationr results

---
 crabs/detector/evaluate_model.py      | 6 ++++--
 crabs/detector/utils/visualization.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index bc7b7ebd..01c537e8 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -313,9 +313,11 @@ def evaluate_parse_args(args):
         type=str,
         default="",
         help=(
-            "Output directory for the exported frames. "
+            "Output directory for the evaluated frames, with bounding boxes. "
+            "Predicted boxes are plotted in red, and ground-truth boxes in "
+            "green. "
             "By default, the frames are saved in a "
-            "`results_<timestamp> folder "
+            "`evaluation_output_<timestamp> folder "
             "under the current working directory."
         ),
     )
diff --git a/crabs/detector/utils/visualization.py b/crabs/detector/utils/visualization.py
index 784eb76a..e52c9ffc 100644
--- a/crabs/detector/utils/visualization.py
+++ b/crabs/detector/utils/visualization.py
@@ -188,7 +188,7 @@ def save_images_with_boxes(
 
     if not output_dir:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        output_dir = f"results_{timestamp}"
+        output_dir = f"evaluation_output_{timestamp}"
     os.makedirs(output_dir, exist_ok=True)
 
     with torch.no_grad():

From 6bedbabf17eab9d9df96499cf45d439de558f50a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 12:13:23 +0000
Subject: [PATCH 11/21] Move run_name assignment to constructor and remove
 option of defining it based on training job run name

---
 crabs/detector/evaluate_model.py | 35 ++++++++++----------------------
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 01c537e8..3ae4e4cc 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -12,6 +12,7 @@
 from crabs.detector.datamodules import CrabsDataModule
 from crabs.detector.models import FasterRCNN
 from crabs.detector.utils.detection import (
+    set_mlflow_run_name,
     setup_mlflow_logger,
     slurm_logs_as_artifacts,
 )
@@ -22,7 +23,6 @@
     get_img_directories_from_ckpt,
     get_mlflow_experiment_name_from_ckpt,
     get_mlflow_parameters_from_ckpt,
-    get_mlflow_run_name_from_ckpt,
 )
 from crabs.detector.utils.visualization import save_images_with_boxes
 
@@ -77,38 +77,33 @@ def __init__(self, args: argparse.Namespace) -> None:
         # Hardware
         self.accelerator = args.accelerator
 
-        # MLflow
+        # MLflow experiment name and run name
         self.experiment_name = get_mlflow_experiment_name_from_ckpt(
             args=self.args, trained_model_path=self.trained_model_path
         )
+        self.run_name = set_mlflow_run_name()
         self.mlflow_folder = args.mlflow_folder
 
-        # Debugging
+        # Debugging settings
         self.fast_dev_run = args.fast_dev_run
         self.limit_test_batches = args.limit_test_batches
 
+        # Log dataset information to screen
         logging.info("Dataset")
         logging.info(f"Images directories: {self.images_dirs}")
         logging.info(f"Annotation files: {self.annotation_files}")
         logging.info(f"Seed: {self.seed_n}")
         logging.info("---------------------------------")
 
-    def setup_trainer(self):
-        """Set up trainer object with logging for testing."""
-        # Assign run name
-        # TODO: mlflow_run_name_auto should be default I think
-        self.run_name = get_mlflow_run_name_from_ckpt(
-            mlflow_run_name_auto=self.args.mlflow_run_name_auto,
-            trained_model_run_name=self.trained_model_run_name,
-            trained_model_path=self.trained_model_path,
-        )
-        # TODO: add these logs to training job too
+        # Log MLflow information to screen
         logging.info("MLflow logs for current job")
         logging.info(f"Experiment name: {self.experiment_name}")
         logging.info(f"Run name: {self.run_name}")
         logging.info(f"Folder: {Path(self.mlflow_folder).resolve()}")
         logging.info("---------------------------------")
 
+    def setup_trainer(self):
+        """Set up trainer object with logging for testing."""
         # Setup logger
         mlf_logger = setup_mlflow_logger(
             experiment_name=self.experiment_name,
@@ -117,15 +112,15 @@ def setup_trainer(self):
             cli_args=self.args,
         )
 
-        # Add trained model section
+        # Add trained model section to MLflow hyperparameters
         mlf_logger.log_hyperparams(
             {
-                "trained_model/run_name": self.trained_model_run_name,
                 "trained_model/experiment_name": self.trained_model_expt_name,
+                "trained_model/run_name": self.trained_model_run_name,
             }
         )
 
-        # Add dataset section
+        # Add dataset section to MLflow hyperparameters
         mlf_logger.log_hyperparams(
             {
                 "dataset/images_dir": self.images_dirs,
@@ -286,14 +281,6 @@ def evaluate_parse_args(args):
             "Default: ./ml-runs"
         ),
     )
-    parser.add_argument(
-        "--mlflow_run_name_auto",
-        action="store_true",
-        help=(
-            "Set the evaluation run name automatically from MLflow, "
-            "ignoring the training job run name."
-        ),
-    )
     parser.add_argument(
         "--save_frames",
         action="store_true",

From 0f7117f677c58f27246b0d6e4db15d208f29776a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 12:13:42 +0000
Subject: [PATCH 12/21] Add name of checkpoint file to MLflow logs

---
 crabs/detector/evaluate_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 3ae4e4cc..68718ffd 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -117,6 +117,7 @@ def setup_trainer(self):
             {
                 "trained_model/experiment_name": self.trained_model_expt_name,
                 "trained_model/run_name": self.trained_model_run_name,
+                "trained_model/ckpt_file": Path(self.trained_model_path).name,
             }
         )
 

From 9bd67639a4e78edf473955ffb174d2b234657457 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 12:14:33 +0000
Subject: [PATCH 13/21] Remove option to define run name from train job run
 name from evaluate utils

---
 crabs/detector/utils/evaluate.py | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/crabs/detector/utils/evaluate.py b/crabs/detector/utils/evaluate.py
index 34676396..3b0d25cc 100644
--- a/crabs/detector/utils/evaluate.py
+++ b/crabs/detector/utils/evaluate.py
@@ -11,7 +11,6 @@
 from crabs.detector.utils.detection import (
     prep_annotation_files,
     prep_img_directories,
-    set_mlflow_run_name,
 )
 
 
@@ -191,7 +190,7 @@ def get_config_from_ckpt(config_file: str, trained_model_path: str) -> dict:
 def get_cli_arg_from_ckpt(
     args: argparse.Namespace, cli_arg_str: str, trained_model_path: str
 ):
-    """Get CLI argument from checkpoint if not in args."""
+    """Get CLI argument from checkpoint if not passed as CLI argument."""
     if getattr(args, cli_arg_str):
         cli_arg = getattr(args, cli_arg_str)
     else:
@@ -258,23 +257,3 @@ def get_mlflow_experiment_name_from_ckpt(
         experiment_name = trained_model_expt_name + "_evaluation"
 
     return experiment_name
-
-
-def get_mlflow_run_name_from_ckpt(
-    mlflow_run_name_auto: bool,
-    trained_model_run_name: str,
-    trained_model_path: str,
-) -> str:  # ---- should be unique
-    """Define run name for eval job from the trained model job."""
-    if mlflow_run_name_auto:
-        run_name = set_mlflow_run_name()
-    else:
-        run_name = (
-            trained_model_run_name
-            + "_"
-            + Path(trained_model_path).stem
-            + "_eval"
-            + set_mlflow_run_name().replace("run", "")
-        )
-
-    return run_name

From 3744021281e649cf9fec91014b5fe9d56176196f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 12:44:23 +0000
Subject: [PATCH 14/21] Adapt test to generalise to other output directory
 names (still not fixed for batch size > 1, see PR 232

---
 crabs/detector/evaluate_model.py      |  6 ++++--
 crabs/detector/utils/visualization.py | 17 +++++++++--------
 tests/test_unit/test_visualization.py | 13 +++++++++----
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 68718ffd..519644c3 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -169,9 +169,11 @@ def evaluate_model(self) -> None:
         # Save images with bounding boxes if required
         if self.args.save_frames:
             save_images_with_boxes(
-                test_dataloader=data_module.test_dataloader(),
+                dataloader=data_module.test_dataloader(),
                 trained_model=trained_model,
-                output_dir=self.args.frames_output_dir,
+                output_dir=str(
+                    Path(self.args.frames_output_dir) / "evaluate_output"
+                ),
                 score_threshold=self.args.frames_score_threshold,
             )
 
diff --git a/crabs/detector/utils/visualization.py b/crabs/detector/utils/visualization.py
index e52c9ffc..fc10deab 100644
--- a/crabs/detector/utils/visualization.py
+++ b/crabs/detector/utils/visualization.py
@@ -154,7 +154,7 @@ def draw_detection(
 
 
 def save_images_with_boxes(
-    test_dataloader: torch.utils.data.DataLoader,
+    dataloader: torch.utils.data.DataLoader,
     trained_model: torch.nn.Module,
     output_dir: str,
     score_threshold: float,
@@ -163,12 +163,13 @@ def save_images_with_boxes(
 
     Parameters
     ----------
-    test_dataloader : DataLoader
-        DataLoader for the test dataset.
+    dataloader : DataLoader
+        DataLoader with the images to save.
     trained_model : torch.nn.Module
         The trained object detection model.
     output_dir : str
-        Directory to save the images with bounding boxes.
+        Path to directory to save the images with bounding boxes.
+        The directory name will be added a timestamp.
     score_threshold : float
         Threshold for object detection.
 
@@ -186,14 +187,14 @@ def save_images_with_boxes(
     trained_model.to(device)
     trained_model.eval()
 
-    if not output_dir:
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        output_dir = f"evaluation_output_{timestamp}"
+    # set output directory
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_dir = f"{output_dir}_{timestamp}"
     os.makedirs(output_dir, exist_ok=True)
 
     with torch.no_grad():
         imgs_id = 0
-        for imgs, annotations in test_dataloader:
+        for imgs, annotations in dataloader:
             imgs_id += 1  # noqa: SIM113
             imgs = list(img.to(device) for img in imgs)
 
diff --git a/tests/test_unit/test_visualization.py b/tests/test_unit/test_visualization.py
index bd6728fe..6bc62639 100644
--- a/tests/test_unit/test_visualization.py
+++ b/tests/test_unit/test_visualization.py
@@ -151,8 +151,8 @@ def test_draw_detection(annotations, detections):
 
 
 @pytest.mark.parametrize(
-    "output_dir_name, expected_dir_name",
-    [("output", r"^output$"), ("", r"^results_\d{8}_\d{6}$")],
+    "output_dir_name",
+    ["output", "evaluation_output"],
 )
 @pytest.mark.parametrize(
     "detections",
@@ -176,7 +176,10 @@ def test_draw_detection(annotations, detections):
 @patch("crabs.detector.utils.visualization.cv2.imwrite")
 @patch("crabs.detector.utils.visualization.os.makedirs")
 def test_save_images_with_boxes(
-    mock_makedirs, mock_imwrite, detections, output_dir_name, expected_dir_name
+    mock_makedirs,
+    mock_imwrite,
+    detections,
+    output_dir_name,
 ):
     trained_model = MagicMock()
     test_dataloader = MagicMock()
@@ -190,7 +193,9 @@ def test_save_images_with_boxes(
     )
 
     # extract and check first positional argument to (mocked) os.makedirs
+    output_dir_regexp = re.compile(rf"{output_dir_name}_\d{{8}}_\d{{6}}$")
     input_path_makedirs = mock_makedirs.call_args[0][0]
-    assert re.match(expected_dir_name, input_path_makedirs)
+    assert output_dir_regexp.match(input_path_makedirs)
 
+    # should be called as many times as batches in the dataloader
     assert mock_imwrite.call_count == len(test_dataloader)

From bcd46e68c90e93785576a6f191e891bd49f4f465 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 14:25:22 +0000
Subject: [PATCH 15/21] Evaluate on the validation split by default, and
 optionally on the test split

---
 README.md                        |  2 +-
 crabs/detector/evaluate_model.py | 43 ++++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 7cb75563..63d7b3c1 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,7 @@ evaluate-detector --trained_model_path <path-to-ckpt-file>
 
 This command assumes the trained detector model (a `.ckpt` checkpoint file) is saved in an MLflow database structure. That is, the checkpoint is assumed to be under a `checkpoints` directory, which in turn should be under a `<mlflow-experiment-hash>/<mlflow-run-hash>` directory. This will be the case if the model has been trained using the `train-detector` command.
 
-The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the test set. It will also log those metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
+The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the validation set by default. To evaluate the model on the test set instead, use the `--use_test_set` flag. The command will also log those performance metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
 ```
 mlflow ui --backend-store-uri file:///<path-to-ml-runs>
 ```
diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index 519644c3..b5ecb6ef 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -73,6 +73,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             cli_arg_str="seed_n",
             trained_model_path=self.trained_model_path,
         )
+        self.evaluation_split = "test" if self.args.use_test_set else "val"
 
         # Hardware
         self.accelerator = args.accelerator
@@ -127,6 +128,7 @@ def setup_trainer(self):
                 "dataset/images_dir": self.images_dirs,
                 "dataset/annotation_files": self.annotation_files,
                 "dataset/seed": self.seed_n,
+                "dataset/evaluation_split": self.evaluation_split,
             }
         )
 
@@ -154,25 +156,33 @@ def evaluate_model(self) -> None:
             self.trained_model_path, config=self.config
         )
 
-        # Run testing
-        # TODO: Optionally on validation set?
-        # trainer.validate(
-        # trained_model,
-        # data_module,
-        # )
+        # Evaluate model on either the validation or the test split
         trainer = self.setup_trainer()
-        trainer.test(
-            trained_model,
-            data_module,
-        )
+        if self.args.use_test_set:
+            trainer.test(
+                trained_model,
+                data_module,
+            )
+        else:
+            trainer.validate(
+                trained_model,
+                data_module,
+            )
 
         # Save images with bounding boxes if required
         if self.args.save_frames:
+            # get relevant dataloader
+            if self.args.use_test_set:
+                eval_dataloader = data_module.test_dataloader()
+            else:
+                eval_dataloader = data_module.val_dataloader()
+
             save_images_with_boxes(
-                dataloader=data_module.test_dataloader(),
+                dataloader=eval_dataloader,
                 trained_model=trained_model,
                 output_dir=str(
-                    Path(self.args.frames_output_dir) / "evaluate_output"
+                    Path(self.args.frames_output_dir)
+                    / f"evaluation_output_{self.evaluation_split}"
                 ),
                 score_threshold=self.args.frames_score_threshold,
             )
@@ -252,7 +262,14 @@ def evaluate_parse_args(args):
             "the trained model is used."
         ),
     )
-
+    parser.add_argument(
+        "--use_test_set",
+        action="store_true",
+        help=(
+            "Evaluate the model on the test split, rather than on the default "
+            "validation split."
+        ),
+    )
     parser.add_argument(
         "--accelerator",
         type=str,

From 832a37b7d823ae077780bff620eb6daeb6574358 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 15:18:09 +0000
Subject: [PATCH 16/21] Update readme to add `--save_frames` flag to evaluate
 section

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 63d7b3c1..e149f4fd 100644
--- a/README.md
+++ b/README.md
@@ -118,12 +118,16 @@ evaluate-detector --trained_model_path <path-to-ckpt-file>
 
 This command assumes the trained detector model (a `.ckpt` checkpoint file) is saved in an MLflow database structure. That is, the checkpoint is assumed to be under a `checkpoints` directory, which in turn should be under a `<mlflow-experiment-hash>/<mlflow-run-hash>` directory. This will be the case if the model has been trained using the `train-detector` command.
 
-The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the validation set by default. To evaluate the model on the test set instead, use the `--use_test_set` flag. The command will also log those performance metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
+The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the validation set by default. To evaluate the model on the test set instead, use the `--use_test_set` flag.
+
+The command will also log those performance metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
 ```
 mlflow ui --backend-store-uri file:///<path-to-ml-runs>
 ```
 where `<path-to-ml-runs>` is the path to the directory where the MLflow output is.
 
+The evaluated samples can be inspected visually by exporting them using the `--save__frames` flag. In this case, the frames with the predicted and ground-truth bounding boxes are saved in a directory called `evaluation_output_<timestamp>` under the current working directory.
+
 To see the full list of possible arguments to the `evaluate-detector` command, run it with the `--help` flag.
 
 ### Run detector+tracking on a video
@@ -134,7 +138,7 @@ To track crabs in a new video, using a trained detector and a tracker, run the f
 detect-and-track-video --trained_model_path <path-to-ckpt-file> --video_path <path-to-input-video>
 ```
 
-This will produce a `tracking_output_<timestamp>` directory with the output from tracking.
+This will produce a `tracking_output_<timestamp>` directory with the output from tracking under the current working directory.
 
 The tracking output consists of:
 - a .csv file named `<video-name>_tracks.csv`, with the tracked bounding boxes data;

From ff30a86c57e1e9037dcee314fc4ba21603c6744a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 19:38:59 +0000
Subject: [PATCH 17/21] Simpify CLI help for experiment name

---
 crabs/detector/evaluate_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
index b5ecb6ef..58f9a9cf 100644
--- a/crabs/detector/evaluate_model.py
+++ b/crabs/detector/evaluate_model.py
@@ -287,8 +287,6 @@ def evaluate_parse_args(args):
         help=(
             "Name of the experiment in MLflow, under which the current run "
             "will be logged. "
-            "For example, the name of the dataset could be used, to group "
-            "runs using the same data. "
             "By default: <trained_model_mlflow_experiment_name>_evaluation."
         ),
     )

From a8a1fc1f135bdba06986c2e893ac58673aef972f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 19:39:39 +0000
Subject: [PATCH 18/21] Bash script to evaluate all epoch checkpoints for a run

---
 bash_scripts/run_evaluation_array.sh | 39 ++++++++++++++++++----------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/bash_scripts/run_evaluation_array.sh b/bash_scripts/run_evaluation_array.sh
index aed3f2b8..b9947c51 100644
--- a/bash_scripts/run_evaluation_array.sh
+++ b/bash_scripts/run_evaluation_array.sh
@@ -10,7 +10,7 @@
 #SBATCH -e slurm_array.%A-%a.%N.err
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=s.minano@ucl.ac.uk
-#SBATCH --array=0-2%3
+#SBATCH --array=0-5%3
 
 
 # NOTE on SBATCH command for array jobs
@@ -21,7 +21,7 @@
 # Source bashrc
 # ----------------------
 # Otherwise `which python` points to the miniconda module's Python
-source ~/.bashrc
+# source ~/.bashrc
 
 
 # memory
@@ -40,19 +40,23 @@ set -o pipefail  # make the pipe fail if any part of it fails
 # Define variables
 # ----------------------
 
+# mlflow
+MLFLOW_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch
+
 # List of models to evaluate
-MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/fe9a6c2f491a4496aade5034c75316cc/checkpoints
+MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints
 LIST_CKPT_FILES=("$MLFLOW_CKPTS_FOLDER"/*.ckpt)
 
 # selected model
 CKPT_PATH=${LIST_CKPT_FILES[${SLURM_ARRAY_TASK_ID}]}
 
-# destination mlflow folder
-# EXPERIMENT_NAME="Sept2023" ----> get from training job
-MLFLOW_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs
+# select whether to evaluate on the validation set or on the
+# test set
+EVALUATION_SPLIT=validation
 
 # version of the codebase
-GIT_BRANCH=main
+# GIT_BRANCH=main-------------------------------------------
+GIT_BRANCH=smg/eval-bash-script-cluster
 
 # --------------------
 # Check inputs
@@ -71,7 +75,7 @@ module load miniconda
 
 # Define a environment for each job in the
 # temporary directory of the compute node
-ENV_NAME=crabs-dev-$SPLIT_SEED-$SLURM_ARRAY_JOB_ID
+ENV_NAME=crabs-dev-$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID
 ENV_PREFIX=$TMPDIR/$ENV_NAME
 
 # create environment
@@ -81,7 +85,7 @@ conda create \
     python=3.10
 
 # activate environment
-conda activate $ENV_PREFIX
+source activate $ENV_PREFIX
 
 # install crabs package in virtual env
 python -m pip install git+https://github.com/SainsburyWellcomeCentre/crabs-exploration.git@$GIT_BRANCH
@@ -105,12 +109,21 @@ echo $(nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used --format
 echo "-----"
 
 
-# -------------------
-# Run training script
-# -------------------
-echo "Evaluating trained model at $CKPT_PATH: "
+# -------------------------
+# Run evaluation script
+# -------------------------
+echo "Evaluating trained model at $CKPT_PATH on $EVALUATION_SPLIT set: "
+
+# conditionally append flag to command
+if [ "$EVALUATION_SPLIT" = "validation" ]; then
+    USE_TEST_SET_FLAG=""
+elif [ "$EVALUATION_SPLIT" = "test" ]; then
+    USE_TEST_SET_FLAG="--use_test_set"
+fi
+
 evaluate-detector  \
  --trained_model_path $CKPT_PATH \
  --accelerator gpu \
  --mlflow_folder $MLFLOW_FOLDER \
+ $USE_TEST_SET_FLAG
 echo "-----"

From 57c80de9906db09dd856ad806cfc80493f43f96a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 19:40:23 +0000
Subject: [PATCH 19/21] Bash scrip to evaluate all last checkpoints of all runs
 in an experiment

---
 bash_scripts/run_evaluation_array.sh | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/bash_scripts/run_evaluation_array.sh b/bash_scripts/run_evaluation_array.sh
index b9947c51..968b66af 100644
--- a/bash_scripts/run_evaluation_array.sh
+++ b/bash_scripts/run_evaluation_array.sh
@@ -43,9 +43,23 @@ set -o pipefail  # make the pipe fail if any part of it fails
 # mlflow
 MLFLOW_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch
 
+# ------------------
 # List of models to evaluate
-MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints
-LIST_CKPT_FILES=("$MLFLOW_CKPTS_FOLDER"/*.ckpt)
+# Option 1: to evaluate all epoch-checkpoints of an MLflow run,
+# uncomment the following two lines:
+# MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints
+# LIST_CKPT_FILES=("$MLFLOW_CKPTS_FOLDER"/*.ckpt)
+
+# Option 2: to evaluate all 'last' checkpoints of an MLflow experiment,
+# uncomment the following two lines:
+MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/*/checkpoints
+mapfile -t LIST_CKPT_FILES < <(find $MLFLOW_CKPTS_FOLDER -type f -name "last.ckpt")
+
+# Can I use one option only?
+# MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/*/checkpoints
+# CKPT_FILENAME=*.ckpt # or last.ckpt
+# mapfile -t LIST_CKPT_FILES < <(find "$MLFLOW_CKPTS_FOLDER" -type f -name "$CKPT_FILENAME")
+#-------------------
 
 # selected model
 CKPT_PATH=${LIST_CKPT_FILES[${SLURM_ARRAY_TASK_ID}]}

From a0f7f84ff23eb4953c00190c4fdda896ed84caf9 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 20:06:20 +0000
Subject: [PATCH 20/21] Using a common syntax for both cases

---
 bash_scripts/run_evaluation_array.sh | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/bash_scripts/run_evaluation_array.sh b/bash_scripts/run_evaluation_array.sh
index 968b66af..f6dc5b2b 100644
--- a/bash_scripts/run_evaluation_array.sh
+++ b/bash_scripts/run_evaluation_array.sh
@@ -45,20 +45,20 @@ MLFLOW_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch
 
 # ------------------
 # List of models to evaluate
-# Option 1: to evaluate all epoch-checkpoints of an MLflow run,
-# uncomment the following two lines:
+# Example 1: to evaluate all epoch-checkpoints of an MLflow run,
 # MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints
-# LIST_CKPT_FILES=("$MLFLOW_CKPTS_FOLDER"/*.ckpt)
+# CKPT_FILENAME=*.ckpt
 
-# Option 2: to evaluate all 'last' checkpoints of an MLflow experiment,
-# uncomment the following two lines:
-MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/*/checkpoints
-mapfile -t LIST_CKPT_FILES < <(find $MLFLOW_CKPTS_FOLDER -type f -name "last.ckpt")
-
-# Can I use one option only?
+# Example 2: to evaluate all 'last' checkpoints of an MLflow experiment,
 # MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/*/checkpoints
-# CKPT_FILENAME=*.ckpt # or last.ckpt
-# mapfile -t LIST_CKPT_FILES < <(find "$MLFLOW_CKPTS_FOLDER" -type f -name "$CKPT_FILENAME")
+# CKPT_FILENAME=last.ckpt
+
+# NOTE: if the paths have spaces, but quotes around the string, but stopping and re-starting at the wildcard.
+# e.g.: "/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/"*"/checkpoints"
+
+MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints
+CKPT_FILENAME=*.ckpt
+mapfile -t LIST_CKPT_FILES < <(find $MLFLOW_CKPTS_FOLDER -type f -name $CKPT_FILENAME)
 #-------------------
 
 # selected model

From 5847524ef876cec6f67298fd4c6c867f0fa6d964 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 31 Oct 2024 20:23:38 +0000
Subject: [PATCH 21/21] Add double quotes

---
 bash_scripts/run_evaluation_array.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/bash_scripts/run_evaluation_array.sh b/bash_scripts/run_evaluation_array.sh
index f6dc5b2b..05c54ce3 100644
--- a/bash_scripts/run_evaluation_array.sh
+++ b/bash_scripts/run_evaluation_array.sh
@@ -10,7 +10,7 @@
 #SBATCH -e slurm_array.%A-%a.%N.err
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=s.minano@ucl.ac.uk
-#SBATCH --array=0-5%3
+#SBATCH --array=0-4%3
 
 
 # NOTE on SBATCH command for array jobs
@@ -53,11 +53,12 @@ MLFLOW_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch
 # MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/*/checkpoints
 # CKPT_FILENAME=last.ckpt
 
-# NOTE: if the paths have spaces, but quotes around the string, but stopping and re-starting at the wildcard.
+# NOTE: if the paths have spaces, put quotes around the string but stopping and re-starting at the wildcard.
 # e.g.: "/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/"*"/checkpoints"
+# e.g.: "checkpoint-epoch="*".ckpt"
 
-MLFLOW_CKPTS_FOLDER=/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints
-CKPT_FILENAME=*.ckpt
+MLFLOW_CKPTS_FOLDER="/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/7a6d5551ca974d578a293928d6385d5a/checkpoints"
+CKPT_FILENAME="checkpoint-epoch="*".ckpt"
 mapfile -t LIST_CKPT_FILES < <(find $MLFLOW_CKPTS_FOLDER -type f -name $CKPT_FILENAME)
 #-------------------