SainsburyWellcomeCentre · nikk-nikaznan · Jul 9, 2024 · Jun 5, 2024 · Jun 5, 2024 · Jun 5, 2024
diff --git a/crabs/tracker/evaluate_tracker.py b/crabs/tracker/evaluate_tracker.py
@@ -1,38 +1,57 @@
 import csv
 import logging
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
 
 import numpy as np
 
 from crabs.tracker.utils.tracking import extract_bounding_box_info
 
 
 class TrackerEvaluate:
-    def __init__(self, gt_dir: str, tracked_list: list, iou_threshold: float):
+    def __init__(
+        self, gt_dir: str, tracked_list: list[np.ndarray], iou_threshold: float
+    ):
+        """
+        Initialize the TrackerEvaluate class with ground truth directory, tracked list, and IoU threshold.
+
+        Parameters
+        ----------
+        gt_dir : str
+            Directory path of the ground truth CSV file.
+        tracked_list : List[np.ndarray]
+            A list where each element is a numpy array representing tracked objects in a frame.
+            Each numpy array has shape (N, 5), where N is the number of objects.
+            The columns are [x1, y1, x2, y2, id], where (x1, y1) and (x2, y2)
+            define the bounding box and id is the object ID.
+        iou_threshold : float
+            Intersection over Union (IoU) threshold for evaluating tracking performance.
+        """
         self.gt_dir = gt_dir
         self.tracked_list = tracked_list
         self.iou_threshold = iou_threshold
 
-    def create_gt_list(
-        self,
-        ground_truth_data: list[Dict[str, Any]],
-        gt_boxes_list: list[np.ndarray],
-    ) -> list[np.ndarray]:
+    def get_ground_truth_data(self) -> Dict[int, Dict[str, Any]]:
         """
-        Creates a list of ground truth bounding boxes organized by frame number.
-
-        Parameters
-        ----------
-        ground_truth_data : list[Dict[str, Any]]
-            A list containing ground truth bounding box data organized by frame number.
-        gt_boxes_list : list[np.ndarray]
-            A list to store the ground truth bounding boxes for each frame.
+        Extract ground truth bounding box data from a CSV file and organize it by frame number.
 
         Returns
         -------
-        list[np.ndarray]:
-            A list containing ground truth bounding boxes organized by frame number.
+        Dict[int, Dict[str, Any]]:
+            A dictionary where the key is the frame number and the value is another dictionary containing:
+            - 'bbox': A list of numpy arrays with coordinates of the bounding box [x, y, x + width, y + height]
+            - 'id': The ground truth ID
         """
+        ground_truth_data = []
+
+        with open(self.gt_dir, "r") as csvfile:
+            csvreader = csv.reader(csvfile)
+            next(csvreader)  # Skip the header row
+            ground_truth_data = [
+                extract_bounding_box_info(row) for row in csvreader
+            ]
+
+        # Format as a dictionary with key = frame number
+        ground_truth_dict: dict = {}
         for data in ground_truth_data:
             frame_number = data["frame_number"]
             bbox = np.array(
@@ -41,53 +60,18 @@ def create_gt_list(
                     data["y"],
                     data["x"] + data["width"],
                     data["y"] + data["height"],
-                    data["id"],
                 ],
                 dtype=np.float32,
             )
-            if gt_boxes_list[frame_number].size == 0:
-                gt_boxes_list[frame_number] = bbox.reshape(
-                    1, -1
-                )  # Initialize as a 2D array
-            else:
-                gt_boxes_list[frame_number] = np.vstack(
-                    [gt_boxes_list[frame_number], bbox]
-                )
-        return gt_boxes_list
+            track_id = int(float(data["id"]))
 
-    def get_ground_truth_data(self) -> list[np.ndarray]:
-        """
-        Extract ground truth bounding box data from a CSV file.
+            if frame_number not in ground_truth_dict:
+                ground_truth_dict[frame_number] = {"bbox": [], "id": []}
 
-        Parameters
-        ----------
-        gt_dir : str
-            The path to the CSV file containing ground truth data.
-
-        Returns
-        -------
-        list[np.ndarray]:
-            A list containing ground truth bounding box data organized by frame number.
-            The numpy array represent the coordinates and ID of the bounding box in the order:
-            x, y, x + width, y + height, ID
-        """
-        ground_truth_data = []
-        max_frame_number = 0
-
-        # Open the CSV file and read its contents line by line
-        with open(self.gt_dir, "r") as csvfile:
-            csvreader = csv.reader(csvfile)
-            next(csvreader)  # Skip the header row
-            for row in csvreader:
-                data = extract_bounding_box_info(row)
-                ground_truth_data.append(data)
-                max_frame_number = max(max_frame_number, data["frame_number"])
-
-        # Initialize a list to store the ground truth bounding boxes for each frame
-        gt_boxes_list = [np.array([]) for _ in range(max_frame_number + 1)]
+            ground_truth_dict[frame_number]["bbox"].append(bbox)
+            ground_truth_dict[frame_number]["id"].append(track_id)
 
-        gt_boxes_list = self.create_gt_list(ground_truth_data, gt_boxes_list)
-        return gt_boxes_list
+        return ground_truth_dict
 
     def calculate_iou(self, box1: np.ndarray, box2: np.ndarray) -> float:
         """
@@ -131,46 +115,53 @@ def calculate_iou(self, box1: np.ndarray, box2: np.ndarray) -> float:
 
     def count_identity_switches(
         self,
-        prev_frame_ids: Optional[list[list[int]]],
-        current_frame_ids: Optional[list[list[int]]],
+        prev_frame_id_map: Optional[Dict[int, int]],
+        current_frame_id_map: Dict[int, int],
     ) -> int:
         """
         Count the number of identity switches between two sets of object IDs.
 
         Parameters
         ----------
-        prev_frame_ids : Optional[list[list[int]]]
-            List of object IDs in the previous frame.
-        current_frame_ids : Optional[list[list[int]]]
-            List of object IDs in the current frame.
+        prev_frame_id_map : Optional[Dict[int, int]]
+            A dictionary mapping ground truth IDs to predicted IDs from the previous frame.
+        gt_to_tracked_map : Dict[int, int]
+            A dictionary mapping ground truth IDs to predicted IDs for the current frame.
+
 
         Returns
         -------
         int
             The number of identity switches between the two sets of object IDs.
         """
 
-        if prev_frame_ids is None or current_frame_ids is None:
+        if prev_frame_id_map is None:
             return 0
 
-        # Initialize count of identity switches
-        num_switches = 0
+        prev_frame_gt_id_map = {v: k for k, v in prev_frame_id_map.items()}
 
-        prev_ids = set(prev_frame_ids[0])
-        current_ids = set(current_frame_ids[0])
+        switch_count = 0
 
-        # Calculate the number of switches by finding the difference in IDs
-        num_switches = len(prev_ids.symmetric_difference(current_ids))
+        for current_gt_id, current_tracked_id in current_frame_id_map.items():
+            prev_tracked_id = prev_frame_id_map.get(current_gt_id)
+            prev_gt_id = prev_frame_gt_id_map.get(current_tracked_id)
+            if prev_tracked_id is not None:
+                if prev_tracked_id != current_tracked_id:
+                    switch_count += 1
+            elif prev_gt_id is not None:
+                if current_gt_id != prev_gt_id:
+                    switch_count += 1
 
-        return num_switches
+        return switch_count
 
     def evaluate_mota(
         self,
         gt_boxes: np.ndarray,
+        gt_ids: np.ndarray,
         tracked_boxes: np.ndarray,
         iou_threshold: float,
-        prev_frame_ids: Optional[list[list[int]]],
-    ) -> float:
+        prev_frame_id_map: Optional[Dict[int, int]],
+    ) -> Tuple[float, Dict[int, int]]:
         """
         Evaluate MOTA (Multiple Object Tracking Accuracy).
 
@@ -179,18 +170,22 @@ def evaluate_mota(
         Parameters
         ----------
         gt_boxes : np.ndarray
-            Ground truth bounding boxes of objects.
+            Ground truth bounding boxes of objects with shape of (N, 4) with (x1, y1, x2, y2).
+        gt_ids : np.ndarray
+            Ground truth IDs corresponding to the bounding boxes with shape of (N, 1).
         tracked_boxes : np.ndarray
-            Tracked bounding boxes of objects.
+            Tracked bounding boxes of objects with shape of (N, 5) with (x1, y1, x2, y2, id).
         iou_threshold : float
             Intersection over Union (IoU) threshold for considering a match.
-        prev_frame_ids : Optional[list[list[int]]]
-            IDs from the previous frame for identity switch detection.
+        prev_frame_id_map : Optional[Dict[int, int]]
+            A dictionary mapping ground truth IDs to predicted IDs from the previous frame.
 
         Returns
         -------
         float
             The computed MOTA (Multi-Object Tracking Accuracy) score for the tracking performance.
+        Dict[int, int]
+            A dictionary mapping ground truth IDs to predicted IDs for the current frame.
 
         Notes
         -----
@@ -203,81 +198,92 @@ def evaluate_mota(
         - Identity Switches: Instances where the tracking algorithm assigns a different ID to an object compared to its ID in the previous frame.
         - Total Ground Truth: The total number of ground truth objects in the scene.
 
-        The MOTA score ranges from 0 to 1, with higher values indicating better tracking performance.
+        The MOTA score ranges from -inf to 1, with higher values indicating better tracking performance.
         A MOTA score of 1 indicates perfect tracking, where there are no missed detections, false positives, or identity switches.
         """
         total_gt = len(gt_boxes)
         false_positive = 0
+        matched_gt_boxes = set()
+        gt_to_tracked_map = {}
 
         for i, tracked_box in enumerate(tracked_boxes):
             best_iou = 0.0
             best_match = None
 
             for j, gt_box in enumerate(gt_boxes):
-                iou = self.calculate_iou(gt_box[:4], tracked_box[:4])
-                if iou > iou_threshold and iou > best_iou:
-                    best_iou = iou
-                    best_match = j
+                if j not in matched_gt_boxes:
+                    iou = self.calculate_iou(gt_box[:4], tracked_box[:4])
+                    if iou > iou_threshold and iou > best_iou:
+                        best_iou = iou
+                        best_match = j
+
             if best_match is not None:
                 # successfully found a matching ground truth box for the tracked box.
-                # set the corresponding ground truth box to None.
-                gt_boxes[best_match] = None
+                matched_gt_boxes.add(best_match)
+                # Map ground truth ID to tracked ID
+                gt_to_tracked_map[int(gt_ids[best_match])] = int(
+                    tracked_box[-1]
+                )
             else:
                 false_positive += 1
 
-        missed_detections = 0
-        for box in gt_boxes:
-            if box is not None and not np.all(np.isnan(box)):
-                # if true ground truth box was not matched with any tracked box
-                missed_detections += 1
-
-        tracked_ids = [[box[-1] for box in tracked_boxes]]
+        missed_detections = total_gt - len(matched_gt_boxes)
 
         num_switches = self.count_identity_switches(
-            prev_frame_ids, tracked_ids
+            prev_frame_id_map, gt_to_tracked_map
         )
 
         mota = (
             1 - (missed_detections + false_positive + num_switches) / total_gt
         )
-        return mota
+        return mota, gt_to_tracked_map
 
-    def evaluate_tracking(self, gt_boxes_list: list) -> list[float]:
+    def evaluate_tracking(
+        self,
+        ground_truth_dict: Dict[int, Dict[str, Any]],
+    ) -> list[float]:
         """
         Evaluate tracking performance using the Multi-Object Tracking Accuracy (MOTA) metric.
 
         Parameters
         ----------
-        gt_boxes_list : list[list[float]]
-            List of ground truth bounding boxes for each frame.
-        tracked_boxes_list : list[list[float]]
-            List of tracked bounding boxes for each frame.
+        ground_truth_dict : dict
+            Dictionary containing ground truth bounding boxes and IDs for each frame, organized by frame number.
 
         Returns
         -------
         list[float]:
             The computed MOTA (Multi-Object Tracking Accuracy) score for the tracking performance.
         """
         mota_values = []
-        prev_frame_ids: Optional[list[list[int]]] = None
-        for gt_boxes, tracked_boxes in zip(gt_boxes_list, self.tracked_list):
-            mota = self.evaluate_mota(
-                gt_boxes,
-                tracked_boxes,
-                self.iou_threshold,
-                prev_frame_ids,
+        prev_frame_id_map: Optional[dict] = None
+
+        for frame_number in sorted(ground_truth_dict.keys()):
+            gt_data = ground_truth_dict[frame_number]
+            gt_boxes = np.array(
+                [[x1, y1, x2, y2] for x1, y1, x2, y2 in gt_data["bbox"]],
+                dtype=np.float32,
             )
-            mota_values.append(mota)
-            # Update previous frame IDs for the next iteration
-            prev_frame_ids = [[box[-1] for box in tracked_boxes]]
+            gt_ids = np.array(gt_data["id"], dtype=np.float32)
+
+            if frame_number < len(self.tracked_list):
+                tracked_boxes = self.tracked_list[frame_number]
+                mota, prev_frame_id_map = self.evaluate_mota(
+                    gt_boxes,
+                    gt_ids,
+                    tracked_boxes,
+                    self.iou_threshold,
+                    prev_frame_id_map,
+                )
+                mota_values.append(mota)
 
         return mota_values
 
     def run_evaluation(self) -> None:
         """
         Run evaluation of tracking based on tracking ground truth.
         """
-        gt_boxes_list = self.get_ground_truth_data()
-        mota_values = self.evaluate_tracking(gt_boxes_list)
+        ground_truth_dict = self.get_ground_truth_data()
+        mota_values = self.evaluate_tracking(ground_truth_dict)
         overall_mota = np.mean(mota_values)
         logging.info("Overall MOTA: %f" % overall_mota)