Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MLflow logging and use .predict method from Lightning #238

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions crabs/detector/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,37 @@ def configure_optimizers(self) -> dict[str, torch.optim.Optimizer]:
weight_decay=self.config["wdecay"],
)
return {"optimizer": optimizer}

# def predict_step(
# self, images: torch.Tensor, dataloader_idx=0
# ) -> torch.Tensor:
# """Perform inference on a batch of images."""

# # ensure batch is on device
# # batch = batch.to(self.device)
# # torch.Size([2160, 4096, 3])

# # apply inference transform
# # test_val_transforms = CrabsDataModule._get_test_val_transform(None)
# # inference_transforms = transforms.Compose(
# # [
# # transforms.ToImage(),
# # transforms.ToDtype(torch.float32, scale=True),
# # ]
# # )
# # batch_transformed = inference_transforms(batch) # B, C, H, W
# # breakpoint()

# # # run prediction
# # predictions = self.model(images)
# transform = transforms.Compose(
# [
# transforms.ToImage(),
# transforms.ToDtype(torch.float32, scale=True),
# ]
# )
# image_tensors = transform(images).to(self.device)
# # image_tensors = image_tensors.unsqueeze(0)
# with torch.no_grad():
# predictions = self.model(image_tensors)
# return predictions
66 changes: 66 additions & 0 deletions crabs/tracker/track_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,18 @@
from pathlib import Path

import cv2
import lightning
import numpy as np
import torch
import torchvision.transforms.v2 as transforms
import yaml # type: ignore

from crabs.detector.models import FasterRCNN
from crabs.detector.utils.detection import (
log_mlflow_metadata_as_info,
set_mlflow_run_name,
setup_mlflow_logger,
)
from crabs.tracker.evaluate_tracker import TrackerEvaluate
from crabs.tracker.sort import Sort
from crabs.tracker.utils.io import (
Expand Down Expand Up @@ -61,6 +67,41 @@ def __init__(self, args: argparse.Namespace) -> None:
iou_threshold=self.config["iou_threshold"],
)

# MLflow experiment name and run name
self.experiment_name = args.experiment_name
self.run_name = set_mlflow_run_name()
self.mlflow_folder = args.mlflow_folder

# Log MLflow information to screen
log_mlflow_metadata_as_info(self)

def setup_trainer(self):
"""Set up trainer object with logging for testing."""
# Setup logger
mlf_logger = setup_mlflow_logger(
experiment_name=self.experiment_name,
run_name=self.run_name,
mlflow_folder=self.mlflow_folder,
cli_args=self.args,
)

# Add trained model section to MLflow hyperparameters
mlf_logger.log_hyperparams(
{
"trained_model/experiment_name": self.trained_model_expt_name,
"trained_model/run_name": self.trained_model_run_name,
"trained_model/ckpt_file": Path(self.trained_model_path).name,
}
)

# Add other unlogged information from init?

# Return trainer linked to logger
return lightning.Trainer(
accelerator=self.accelerator, # lightning accelerators
logger=mlf_logger,
)

def setup(self):
"""Load tracking config, trained model and input video path."""
with open(self.config_file) as f:
Expand Down Expand Up @@ -229,6 +270,12 @@ def run_tracking(self):
if self.args.save_frames:
close_csv_file(self.csv_file)

# if this is a slurm job: add slurm logs as artifacts
# slurm_job_id = os.environ.get("SLURM_JOB_ID")
# slurm_job_name = os.environ.get("SLURM_JOB_NAME")
# if slurm_job_id and (slurm_job_name != "bash"):
# slurm_logs_as_artifacts(trainer.logger, slurm_job_id)


def main(args) -> None:
"""Run detection+tracking inference on video.
Expand Down Expand Up @@ -319,6 +366,25 @@ def tracking_parse_args(args):
"Valid inputs are: cpu or gpu. Default: gpu."
),
)
parser.add_argument(
"--experiment_name",
type=str,
default="Inference",
help=(
"Name of the experiment in MLflow, under which the current run "
"will be logged. "
"By default: Inference."
),
)
parser.add_argument(
"--mlflow_folder",
type=str,
default="./ml-runs",
help=(
"Path to MLflow directory where to log the evaluation data. "
"Default: 'ml-runs' directory under the current working directory."
),
)
parser.add_argument(
"--max_frames_to_read",
type=int,
Expand Down
2 changes: 2 additions & 0 deletions tests/test_unit/test_track_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def mock_args():
accelerator="gpu",
annotations_file=None,
save_video=None,
experiment_name=None,
mlflow_folder=Path(temp_dir) / "ml-runs",
)


Expand Down