Skip to content

Commit

Permalink
Add MLflow logs to evaluate job (#220)
Browse files Browse the repository at this point in the history
* Define mlflow experiment and run name with reference to the trained model

* First draft bash script

* add script to select best model

* Add checkpoint path to evaluation run name

* Fix ruff

* Remove select best model empty script

* Log dataset info and trained model info to mlflow

* Print MLflow details to screen

* Small edits to comments

* Rename output folder for evaluationr results

* Move run_name assignment to constructor and remove option of defining it based on training job run name

* Add name of checkpoint file to MLflow logs

* Remove option to define run name from train job run name from evaluate utils

* Adapt test to generalise to other output directory names (still not fixed for batch size > 1, see PR 232

* Evaluate on the validation split by default, and optionally on the test split

* Update readme to add `--save_frames` flag to evaluate section

* Simpify CLI help for experiment name

* Remove bash script (see PR 236)

* Clarify CLI help
  • Loading branch information
sfmig authored Nov 1, 2024
1 parent 7105c4c commit e7a2da1
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 57 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,16 @@ evaluate-detector --trained_model_path <path-to-ckpt-file>

This command assumes the trained detector model (a `.ckpt` checkpoint file) is saved in an MLflow database structure. That is, the checkpoint is assumed to be under a `checkpoints` directory, which in turn should be under a `<mlflow-experiment-hash>/<mlflow-run-hash>` directory. This will be the case if the model has been trained using the `train-detector` command.

The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the test set. It will also log those metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
The `evaluate-detector` command will print to screen the average precision and average recall of the detector on the validation set by default. To evaluate the model on the test set instead, use the `--use_test_set` flag.

The command will also log those performance metrics to the MLflow database, along with the hyperparameters of the evaluation job. To visualise the MLflow summary of the evaluation job, run:
```
mlflow ui --backend-store-uri file:///<path-to-ml-runs>
```
where `<path-to-ml-runs>` is the path to the directory where the MLflow output is.

The evaluated samples can be inspected visually by exporting them using the `--save__frames` flag. In this case, the frames with the predicted and ground-truth bounding boxes are saved in a directory called `evaluation_output_<timestamp>` under the current working directory.

To see the full list of possible arguments to the `evaluate-detector` command, run it with the `--help` flag.

### Run detector+tracking on a video
Expand All @@ -134,7 +138,7 @@ To track crabs in a new video, using a trained detector and a tracker, run the f
detect-and-track-video --trained_model_path <path-to-ckpt-file> --video_path <path-to-input-video>
```

This will produce a `tracking_output_<timestamp>` directory with the output from tracking.
This will produce a `tracking_output_<timestamp>` directory with the output from tracking under the current working directory.

The tracking output consists of:
- a .csv file named `<video-name>_tracks.csv`, with the tracked bounding boxes data;
Expand Down
144 changes: 105 additions & 39 deletions crabs/detector/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import sys
from pathlib import Path

import lightning
import torch
Expand All @@ -20,9 +21,13 @@
get_cli_arg_from_ckpt,
get_config_from_ckpt,
get_img_directories_from_ckpt,
get_mlflow_experiment_name_from_ckpt,
get_mlflow_parameters_from_ckpt,
)
from crabs.detector.utils.visualization import save_images_with_boxes

logging.getLogger().setLevel(logging.INFO)


class DetectorEvaluate:
"""Interface for evaluating an object detector.
Expand All @@ -39,10 +44,17 @@ def __init__(self, args: argparse.Namespace) -> None:
# CLI inputs
self.args = args

# trained model
# trained model data
self.trained_model_path = args.trained_model_path
trained_model_params = get_mlflow_parameters_from_ckpt(
self.trained_model_path
)
self.trained_model_run_name = trained_model_params["run_name"]
self.trained_model_expt_name = trained_model_params[
"cli_args/experiment_name"
]

# config: retreieve from ckpt if not passed as CLI argument
# config: retrieve from ckpt if not passed as CLI argument
self.config_file = args.config_file
self.config = get_config_from_ckpt(
config_file=self.config_file,
Expand All @@ -61,28 +73,38 @@ def __init__(self, args: argparse.Namespace) -> None:
cli_arg_str="seed_n",
trained_model_path=self.trained_model_path,
)
self.evaluation_split = "test" if self.args.use_test_set else "val"

# Hardware
self.accelerator = args.accelerator

# MLflow
self.experiment_name = args.experiment_name
# MLflow experiment name and run name
self.experiment_name = get_mlflow_experiment_name_from_ckpt(
args=self.args, trained_model_path=self.trained_model_path
)
self.run_name = set_mlflow_run_name()
self.mlflow_folder = args.mlflow_folder

# Debugging
# Debugging settings
self.fast_dev_run = args.fast_dev_run
self.limit_test_batches = args.limit_test_batches

# Log dataset information to screen
logging.info("Dataset")
logging.info(f"Images directories: {self.images_dirs}")
logging.info(f"Annotation files: {self.annotation_files}")
logging.info(f"Seed: {self.seed_n}")
logging.info("---------------------------------")

# Log MLflow information to screen
logging.info("MLflow logs for current job")
logging.info(f"Experiment name: {self.experiment_name}")
logging.info(f"Run name: {self.run_name}")
logging.info(f"Folder: {Path(self.mlflow_folder).resolve()}")
logging.info("---------------------------------")

def setup_trainer(self):
"""Set up trainer object with logging for testing."""
# Assign run name
self.run_name = set_mlflow_run_name()

# Setup logger
mlf_logger = setup_mlflow_logger(
experiment_name=self.experiment_name,
Expand All @@ -91,6 +113,25 @@ def setup_trainer(self):
cli_args=self.args,
)

# Add trained model section to MLflow hyperparameters
mlf_logger.log_hyperparams(
{
"trained_model/experiment_name": self.trained_model_expt_name,
"trained_model/run_name": self.trained_model_run_name,
"trained_model/ckpt_file": Path(self.trained_model_path).name,
}
)

# Add dataset section to MLflow hyperparameters
mlf_logger.log_hyperparams(
{
"dataset/images_dir": self.images_dirs,
"dataset/annotation_files": self.annotation_files,
"dataset/seed": self.seed_n,
"dataset/evaluation_split": self.evaluation_split,
}
)

# Return trainer linked to logger
return lightning.Trainer(
accelerator=self.accelerator,
Expand All @@ -107,26 +148,42 @@ def evaluate_model(self) -> None:
list_annotation_files=self.annotation_files,
split_seed=self.seed_n,
config=self.config,
no_data_augmentation=True,
)

# Get trained model
trained_model = FasterRCNN.load_from_checkpoint(
self.trained_model_path, config=self.config
)

# Run testing
# Evaluate model on either the validation or the test split
trainer = self.setup_trainer()
trainer.test(
trained_model,
data_module,
)
if self.args.use_test_set:
trainer.test(
trained_model,
data_module,
)
else:
trainer.validate(
trained_model,
data_module,
)

# Save images if required
# Save images with bounding boxes if required
if self.args.save_frames:
# get relevant dataloader
if self.args.use_test_set:
eval_dataloader = data_module.test_dataloader()
else:
eval_dataloader = data_module.val_dataloader()

save_images_with_boxes(
test_dataloader=data_module.test_dataloader(),
dataloader=eval_dataloader,
trained_model=trained_model,
output_dir=self.args.frames_output_dir,
output_dir=str(
Path(self.args.frames_output_dir)
/ f"evaluation_output_{self.evaluation_split}"
),
score_threshold=self.args.frames_score_threshold,
)

Expand Down Expand Up @@ -205,7 +262,14 @@ def evaluate_parse_args(args):
"the trained model is used."
),
)

parser.add_argument(
"--use_test_set",
action="store_true",
help=(
"Evaluate the model on the test split, rather than on the default "
"validation split."
),
)
parser.add_argument(
"--accelerator",
type=str,
Expand All @@ -220,35 +284,20 @@ def evaluate_parse_args(args):
parser.add_argument(
"--experiment_name",
type=str,
default="Sept2023_evaluation",
help=(
"Name of the experiment in MLflow, under which the current run "
"will be logged. "
"For example, the name of the dataset could be used, to group "
"runs using the same data. "
"Default: Sept2023_evaluation"
),
)
parser.add_argument(
"--fast_dev_run",
action="store_true",
help="Debugging option to run training for one batch and one epoch",
)
parser.add_argument(
"--limit_test_batches",
type=float,
default=1.0,
help=(
"Debugging option to run training on a fraction of "
"the training set."
"Default: 1.0 (all the training set)"
"By default: <trained_model_mlflow_experiment_name>_evaluation."
),
)
parser.add_argument(
"--mlflow_folder",
type=str,
default="./ml-runs",
help=("Path to MLflow directory. Default: ./ml-runs"),
help=(
"Path to MLflow directory where to log the evaluation data. "
"Default: 'ml-runs' directory under the current working directory."
),
)
parser.add_argument(
"--save_frames",
Expand All @@ -269,12 +318,29 @@ def evaluate_parse_args(args):
type=str,
default="",
help=(
"Output directory for the exported frames. "
"Output directory for the evaluated frames, with bounding boxes. "
"Predicted boxes are plotted in red, and ground-truth boxes in "
"green. "
"By default, the frames are saved in a "
"`results_<timestamp> folder "
"`evaluation_output_<timestamp> folder "
"under the current working directory."
),
)
parser.add_argument(
"--fast_dev_run",
action="store_true",
help="Debugging option to run training for one batch and one epoch",
)
parser.add_argument(
"--limit_test_batches",
type=float,
default=1.0,
help=(
"Debugging option to run training on a fraction of "
"the training set."
"Default: 1.0 (all the training set)"
),
)
return parser.parse_args(args)


Expand Down
23 changes: 19 additions & 4 deletions crabs/detector/utils/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import argparse
import ast
import logging
import sys
from pathlib import Path

Expand All @@ -14,8 +13,6 @@
prep_img_directories,
)

logging.basicConfig(level=logging.INFO)


def compute_precision_recall(class_stats: dict) -> tuple[float, float, dict]:
"""Compute precision and recall.
Expand Down Expand Up @@ -143,6 +140,7 @@ def get_mlflow_parameters_from_ckpt(trained_model_path: str) -> dict:
# get parameters of the run
run = mlrun_client.get_run(ckpt_runID)
params = run.data.params
params["run_name"] = run.info.run_name

return params

Expand Down Expand Up @@ -192,7 +190,7 @@ def get_config_from_ckpt(config_file: str, trained_model_path: str) -> dict:
def get_cli_arg_from_ckpt(
args: argparse.Namespace, cli_arg_str: str, trained_model_path: str
):
"""Get CLI argument from checkpoint if not in args."""
"""Get CLI argument from checkpoint if not passed as CLI argument."""
if getattr(args, cli_arg_str):
cli_arg = getattr(args, cli_arg_str)
else:
Expand Down Expand Up @@ -242,3 +240,20 @@ def get_annotation_files_from_ckpt(
input_annotation_files, dataset_dirs
)
return annotation_files


def get_mlflow_experiment_name_from_ckpt(
args: argparse.Namespace, trained_model_path: str
) -> str:
"""Define MLflow experiment name from the training job.
Only used if the experiment name is not passed via CLI.
"""
if args.experiment_name:
experiment_name = args.experiment_name
else:
params = get_mlflow_parameters_from_ckpt(trained_model_path)
trained_model_expt_name = params["cli_args/experiment_name"]
experiment_name = trained_model_expt_name + "_evaluation"

return experiment_name
17 changes: 9 additions & 8 deletions crabs/detector/utils/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def draw_detection(


def save_images_with_boxes(
test_dataloader: torch.utils.data.DataLoader,
dataloader: torch.utils.data.DataLoader,
trained_model: torch.nn.Module,
output_dir: str,
score_threshold: float,
Expand All @@ -163,12 +163,13 @@ def save_images_with_boxes(
Parameters
----------
test_dataloader : DataLoader
DataLoader for the test dataset.
dataloader : DataLoader
DataLoader with the images to save.
trained_model : torch.nn.Module
The trained object detection model.
output_dir : str
Directory to save the images with bounding boxes.
Path to directory to save the images with bounding boxes.
The directory name will be added a timestamp.
score_threshold : float
Threshold for object detection.
Expand All @@ -186,14 +187,14 @@ def save_images_with_boxes(
trained_model.to(device)
trained_model.eval()

if not output_dir:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"results_{timestamp}"
# set output directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{output_dir}_{timestamp}"
os.makedirs(output_dir, exist_ok=True)

with torch.no_grad():
imgs_id = 0
for imgs, annotations in test_dataloader:
for imgs, annotations in dataloader:
imgs_id += 1 # noqa: SIM113
imgs = list(img.to(device) for img in imgs)

Expand Down
Loading

0 comments on commit e7a2da1

Please sign in to comment.