Add MLflow logs to evaluate job (#220)

* Define mlflow experiment and run name with reference to the trained model * First draft bash script * add script to select best model * Add checkpoint path to evaluation run name * Fix ruff * Remove select best model empty script * Log dataset info and trained model info to mlflow * Print MLflow details to screen * Small edits to comments * Rename output folder for evaluationr results * Move run_name assignment to constructor and remove option of defining it based on training job run name * Add name of checkpoint file to MLflow logs * Remove option to define run name from train job run name from evaluate utils * Adapt test to generalise to other output directory names (still not fixed for batch size > 1, see PR 232 * Evaluate on the validation split by default, and optionally on the test split * Update readme to add `--save_frames` flag to evaluate section * Simpify CLI help for experiment name * Remove bash script (see PR 236) * Clarify CLI help
SainsburyWellcomeCentre · Nov 1, 2024 · e7a2da1 · e7a2da1
1 parent 7105c4c
commit e7a2da1
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -118,12 +118,16 @@ evaluate-detector --trained_model_path <path-to-ckpt-file>
 
 This command assumes the trained detector model (a `.ckpt` checkpoint file) is saved in an MLflow database structure. That is, the checkpoint is assumed to be under a `checkpoints` directory, which in turn should be under a `<mlflow-experiment-hash>/<mlflow-run-hash>` directory. This will be the case if the model has been trained using the `train-detector` command.
 
-The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the test set. It will also log those metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
+The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the validation set by default. To evaluate the model on the test set instead, use the `--use_test_set` flag.
+
+The command will also log those performance metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
 ```
 mlflow ui --backend-store-uri file:///<path-to-ml-runs>
 ```
 where `<path-to-ml-runs>` is the path to the directory where the MLflow output is.
 
+The evaluated samples can be inspected visually by exporting them using the `--save__frames` flag. In this case, the frames with the predicted and ground-truth bounding boxes are saved in a directory called `evaluation_output_<timestamp>` under the current working directory.
+
 To see the full list of possible arguments to the `evaluate-detector` command, run it with the `--help` flag.
 
 ### Run detector+tracking on a video
@@ -134,7 +138,7 @@ To track crabs in a new video, using a trained detector and a tracker, run the f
 detect-and-track-video --trained_model_path <path-to-ckpt-file> --video_path <path-to-input-video>
 ```
 
-This will produce a `tracking_output_<timestamp>` directory with the output from tracking.
+This will produce a `tracking_output_<timestamp>` directory with the output from tracking under the current working directory.
 
 The tracking output consists of:
 - a .csv file named `<video-name>_tracks.csv`, with the tracked bounding boxes data;

diff --git a/crabs/detector/evaluate_model.py b/crabs/detector/evaluate_model.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import sys
+from pathlib import Path
 
 import lightning
 import torch
@@ -20,9 +21,13 @@
     get_cli_arg_from_ckpt,
     get_config_from_ckpt,
     get_img_directories_from_ckpt,
+    get_mlflow_experiment_name_from_ckpt,
+    get_mlflow_parameters_from_ckpt,
 )
 from crabs.detector.utils.visualization import save_images_with_boxes
 
+logging.getLogger().setLevel(logging.INFO)
+
 
 class DetectorEvaluate:
     """Interface for evaluating an object detector.
@@ -39,10 +44,17 @@ def __init__(self, args: argparse.Namespace) -> None:
         # CLI inputs
         self.args = args
 
-        # trained model
+        # trained model data
         self.trained_model_path = args.trained_model_path
+        trained_model_params = get_mlflow_parameters_from_ckpt(
+            self.trained_model_path
+        )
+        self.trained_model_run_name = trained_model_params["run_name"]
+        self.trained_model_expt_name = trained_model_params[
+            "cli_args/experiment_name"
+        ]
 
-        # config: retreieve from ckpt if not passed as CLI argument
+        # config: retrieve from ckpt if not passed as CLI argument
         self.config_file = args.config_file
         self.config = get_config_from_ckpt(
             config_file=self.config_file,
@@ -61,28 +73,38 @@ def __init__(self, args: argparse.Namespace) -> None:
             cli_arg_str="seed_n",
             trained_model_path=self.trained_model_path,
         )
+        self.evaluation_split = "test" if self.args.use_test_set else "val"
 
         # Hardware
         self.accelerator = args.accelerator
 
-        # MLflow
-        self.experiment_name = args.experiment_name
+        # MLflow experiment name and run name
+        self.experiment_name = get_mlflow_experiment_name_from_ckpt(
+            args=self.args, trained_model_path=self.trained_model_path
+        )
+        self.run_name = set_mlflow_run_name()
         self.mlflow_folder = args.mlflow_folder
 
-        # Debugging
+        # Debugging settings
         self.fast_dev_run = args.fast_dev_run
         self.limit_test_batches = args.limit_test_batches
 
+        # Log dataset information to screen
         logging.info("Dataset")
         logging.info(f"Images directories: {self.images_dirs}")
         logging.info(f"Annotation files: {self.annotation_files}")
         logging.info(f"Seed: {self.seed_n}")
+        logging.info("---------------------------------")
+
+        # Log MLflow information to screen
+        logging.info("MLflow logs for current job")
+        logging.info(f"Experiment name: {self.experiment_name}")
+        logging.info(f"Run name: {self.run_name}")
+        logging.info(f"Folder: {Path(self.mlflow_folder).resolve()}")
+        logging.info("---------------------------------")
 
     def setup_trainer(self):
         """Set up trainer object with logging for testing."""
-        # Assign run name
-        self.run_name = set_mlflow_run_name()
-
         # Setup logger
         mlf_logger = setup_mlflow_logger(
             experiment_name=self.experiment_name,
@@ -91,6 +113,25 @@ def setup_trainer(self):
             cli_args=self.args,
         )
 
+        # Add trained model section to MLflow hyperparameters
+        mlf_logger.log_hyperparams(
+            {
+                "trained_model/experiment_name": self.trained_model_expt_name,
+                "trained_model/run_name": self.trained_model_run_name,
+                "trained_model/ckpt_file": Path(self.trained_model_path).name,
+            }
+        )
+
+        # Add dataset section to MLflow hyperparameters
+        mlf_logger.log_hyperparams(
+            {
+                "dataset/images_dir": self.images_dirs,
+                "dataset/annotation_files": self.annotation_files,
+                "dataset/seed": self.seed_n,
+                "dataset/evaluation_split": self.evaluation_split,
+            }
+        )
+
         # Return trainer linked to logger
         return lightning.Trainer(
             accelerator=self.accelerator,
@@ -107,26 +148,42 @@ def evaluate_model(self) -> None:
             list_annotation_files=self.annotation_files,
             split_seed=self.seed_n,
             config=self.config,
+            no_data_augmentation=True,
         )
 
         # Get trained model
         trained_model = FasterRCNN.load_from_checkpoint(
             self.trained_model_path, config=self.config
         )
 
-        # Run testing
+        # Evaluate model on either the validation or the test split
         trainer = self.setup_trainer()
-        trainer.test(
-            trained_model,
-            data_module,
-        )
+        if self.args.use_test_set:
+            trainer.test(
+                trained_model,
+                data_module,
+            )
+        else:
+            trainer.validate(
+                trained_model,
+                data_module,
+            )
 
-        # Save images if required
+        # Save images with bounding boxes if required
         if self.args.save_frames:
+            # get relevant dataloader
+            if self.args.use_test_set:
+                eval_dataloader = data_module.test_dataloader()
+            else:
+                eval_dataloader = data_module.val_dataloader()
+
             save_images_with_boxes(
-                test_dataloader=data_module.test_dataloader(),
+                dataloader=eval_dataloader,
                 trained_model=trained_model,
-                output_dir=self.args.frames_output_dir,
+                output_dir=str(
+                    Path(self.args.frames_output_dir)
+                    / f"evaluation_output_{self.evaluation_split}"
+                ),
                 score_threshold=self.args.frames_score_threshold,
             )
 
@@ -205,7 +262,14 @@ def evaluate_parse_args(args):
             "the trained model is used."
         ),
     )
-
+    parser.add_argument(
+        "--use_test_set",
+        action="store_true",
+        help=(
+            "Evaluate the model on the test split, rather than on the default "
+            "validation split."
+        ),
+    )
     parser.add_argument(
         "--accelerator",
         type=str,
@@ -220,35 +284,20 @@ def evaluate_parse_args(args):
     parser.add_argument(
         "--experiment_name",
         type=str,
-        default="Sept2023_evaluation",
         help=(
             "Name of the experiment in MLflow, under which the current run "
             "will be logged. "
-            "For example, the name of the dataset could be used, to group "
-            "runs using the same data. "
-            "Default: Sept2023_evaluation"
-        ),
-    )
-    parser.add_argument(
-        "--fast_dev_run",
-        action="store_true",
-        help="Debugging option to run training for one batch and one epoch",
-    )
-    parser.add_argument(
-        "--limit_test_batches",
-        type=float,
-        default=1.0,
-        help=(
-            "Debugging option to run training on a fraction of "
-            "the training set."
-            "Default: 1.0 (all the training set)"
+            "By default: <trained_model_mlflow_experiment_name>_evaluation."
         ),
     )
     parser.add_argument(
         "--mlflow_folder",
         type=str,
         default="./ml-runs",
-        help=("Path to MLflow directory. Default: ./ml-runs"),
+        help=(
+            "Path to MLflow directory where to log the evaluation data. "
+            "Default: 'ml-runs' directory under the current working directory."
+        ),
     )
     parser.add_argument(
         "--save_frames",
@@ -269,12 +318,29 @@ def evaluate_parse_args(args):
         type=str,
         default="",
         help=(
-            "Output directory for the exported frames. "
+            "Output directory for the evaluated frames, with bounding boxes. "
+            "Predicted boxes are plotted in red, and ground-truth boxes in "
+            "green. "
             "By default, the frames are saved in a "
-            "`results_<timestamp> folder "
+            "`evaluation_output_<timestamp> folder "
             "under the current working directory."
         ),
     )
+    parser.add_argument(
+        "--fast_dev_run",
+        action="store_true",
+        help="Debugging option to run training for one batch and one epoch",
+    )
+    parser.add_argument(
+        "--limit_test_batches",
+        type=float,
+        default=1.0,
+        help=(
+            "Debugging option to run training on a fraction of "
+            "the training set."
+            "Default: 1.0 (all the training set)"
+        ),
+    )
     return parser.parse_args(args)
 
 

diff --git a/crabs/detector/utils/evaluate.py b/crabs/detector/utils/evaluate.py
@@ -2,7 +2,6 @@
 
 import argparse
 import ast
-import logging
 import sys
 from pathlib import Path
 
@@ -14,8 +13,6 @@
     prep_img_directories,
 )
 
-logging.basicConfig(level=logging.INFO)
-
 
 def compute_precision_recall(class_stats: dict) -> tuple[float, float, dict]:
     """Compute precision and recall.
@@ -143,6 +140,7 @@ def get_mlflow_parameters_from_ckpt(trained_model_path: str) -> dict:
     # get parameters of the run
     run = mlrun_client.get_run(ckpt_runID)
     params = run.data.params
+    params["run_name"] = run.info.run_name
 
     return params
 
@@ -192,7 +190,7 @@ def get_config_from_ckpt(config_file: str, trained_model_path: str) -> dict:
 def get_cli_arg_from_ckpt(
     args: argparse.Namespace, cli_arg_str: str, trained_model_path: str
 ):
-    """Get CLI argument from checkpoint if not in args."""
+    """Get CLI argument from checkpoint if not passed as CLI argument."""
     if getattr(args, cli_arg_str):
         cli_arg = getattr(args, cli_arg_str)
     else:
@@ -242,3 +240,20 @@ def get_annotation_files_from_ckpt(
         input_annotation_files, dataset_dirs
     )
     return annotation_files
+
+
+def get_mlflow_experiment_name_from_ckpt(
+    args: argparse.Namespace, trained_model_path: str
+) -> str:
+    """Define MLflow experiment name from the training job.
+
+    Only used if the experiment name is not passed via CLI.
+    """
+    if args.experiment_name:
+        experiment_name = args.experiment_name
+    else:
+        params = get_mlflow_parameters_from_ckpt(trained_model_path)
+        trained_model_expt_name = params["cli_args/experiment_name"]
+        experiment_name = trained_model_expt_name + "_evaluation"
+
+    return experiment_name
diff --git a/crabs/detector/utils/visualization.py b/crabs/detector/utils/visualization.py
@@ -154,7 +154,7 @@ def draw_detection(
 
 
 def save_images_with_boxes(
-    test_dataloader: torch.utils.data.DataLoader,
+    dataloader: torch.utils.data.DataLoader,
     trained_model: torch.nn.Module,
     output_dir: str,
     score_threshold: float,
@@ -163,12 +163,13 @@ def save_images_with_boxes(
 
     Parameters
     ----------
-    test_dataloader : DataLoader
-        DataLoader for the test dataset.
+    dataloader : DataLoader
+        DataLoader with the images to save.
     trained_model : torch.nn.Module
         The trained object detection model.
     output_dir : str
-        Directory to save the images with bounding boxes.
+        Path to directory to save the images with bounding boxes.
+        The directory name will be added a timestamp.
     score_threshold : float
         Threshold for object detection.
 
@@ -186,14 +187,14 @@ def save_images_with_boxes(
     trained_model.to(device)
     trained_model.eval()
 
-    if not output_dir:
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        output_dir = f"results_{timestamp}"
+    # set output directory
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_dir = f"{output_dir}_{timestamp}"
     os.makedirs(output_dir, exist_ok=True)
 
     with torch.no_grad():
         imgs_id = 0
-        for imgs, annotations in test_dataloader:
+        for imgs, annotations in dataloader:
             imgs_id += 1  # noqa: SIM113
             imgs = list(img.to(device) for img in imgs)