Evaluate on the validation split by default, and optionally on the te…

…st split
SainsburyWellcomeCentre · Oct 31, 2024 · bcd46e6 · bcd46e6
1 parent 3744021
commit bcd46e6
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -118,7 +118,7 @@ evaluate-detector --trained_model_path <path-to-ckpt-file>
 
 This command assumes the trained detector model (a `.ckpt` checkpoint file) is saved in an MLflow database structure. That is, the checkpoint is assumed to be under a `checkpoints` directory, which in turn should be under a `<mlflow-experiment-hash>/<mlflow-run-hash>` directory. This will be the case if the model has been trained using the `train-detector` command.
 
-The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the test set. It will also log those metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
+The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the validation set by default. To evaluate the model on the test set instead, use the `--use_test_set` flag. The command will also log those performance metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
 ```
 mlflow ui --backend-store-uri file:///<path-to-ml-runs>
 ```

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
@@ -73,6 +73,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             cli_arg_str="seed_n",
             trained_model_path=self.trained_model_path,
         )
+        self.evaluation_split = "test" if self.args.use_test_set else "val"
 
         # Hardware
         self.accelerator = args.accelerator
@@ -127,6 +128,7 @@ def setup_trainer(self):
                 "dataset/images_dir": self.images_dirs,
                 "dataset/annotation_files": self.annotation_files,
                 "dataset/seed": self.seed_n,
+                "dataset/evaluation_split": self.evaluation_split,
             }
         )
 
@@ -154,25 +156,33 @@ def evaluate_model(self) -> None:
             self.trained_model_path, config=self.config
         )
 
-        # Run testing
-        # TODO: Optionally on validation set?
-        # trainer.validate(
-        # trained_model,
-        # data_module,
-        # )
+        # Evaluate model on either the validation or the test split
         trainer = self.setup_trainer()
-        trainer.test(
-            trained_model,
-            data_module,
-        )
+        if self.args.use_test_set:
+            trainer.test(
+                trained_model,
+                data_module,
+            )
+        else:
+            trainer.validate(
+                trained_model,
+                data_module,
+            )
 
         # Save images with bounding boxes if required
         if self.args.save_frames:
+            # get relevant dataloader
+            if self.args.use_test_set:
+                eval_dataloader = data_module.test_dataloader()
+            else:
+                eval_dataloader = data_module.val_dataloader()
+
             save_images_with_boxes(
-                dataloader=data_module.test_dataloader(),
+                dataloader=eval_dataloader,
                 trained_model=trained_model,
                 output_dir=str(
-                    Path(self.args.frames_output_dir) / "evaluate_output"
+                    Path(self.args.frames_output_dir)
+                    / f"evaluation_output_{self.evaluation_split}"
                 ),
                 score_threshold=self.args.frames_score_threshold,
             )
@@ -252,7 +262,14 @@ def evaluate_parse_args(args):
             "the trained model is used."
         ),
     )
-
+    parser.add_argument(
+        "--use_test_set",
+        action="store_true",
+        help=(
+            "Evaluate the model on the test split, rather than on the default "
+            "validation split."
+        ),
+    )
     parser.add_argument(
         "--accelerator",
         type=str,