chnages on some of the reviews, tbc

SainsburyWellcomeCentre · Aug 1, 2023 · b86ebcb · b86ebcb
1 parent ce42126
commit b86ebcb
Show file tree

Hide file tree

Showing 2 changed files with 90 additions and 41 deletions.
diff --git a/bboxes labelling/additional_channels_extraction.py b/bboxes labelling/additional_channels_extraction.py
@@ -1,43 +1,44 @@
-# based on https://github.com/visipedia/caltech-fish-counting
-
 import os
 import argparse
-import json
 import cv2
 import numpy as np
 from PIL import Image
 from pathlib import Path
 
+from utils import read_json_file
+
+
+def compute_stacked_inputs(args) -> None:
+    """
+    Function to compute grayscale, background subtracted and motion signal frame based
 
-def read_json_file(file_path):
-    try:
-        with open(file_path, "r") as file:
-            data = json.load(file)
-        return data
-    except FileNotFoundError:
-        print(f"File not found: {file_path}")
-        return None
-    except json.JSONDecodeError:
-        print(f"Error decoding JSON data from file: {file_path}")
-        return None
+    Args:
+        args (argparse.Namespace): An object containing the parsed command-line arguments.
 
+    Returns:
+        None
+    
+    References:
+        https://github.com/visipedia/caltech-fish-counting
 
-def get_frames(args):
+    """
+
     frame_dict = read_json_file(args.json_path)
 
     # Set batch size (number of frames per batch)
     batch_size = 1000
 
-    for vid_file, frame_idx in frame_dict.items():
+    for vid_file, list_frame_indices in frame_dict.items():
         if not os.path.exists(vid_file):
             print(f"Video path not found: {vid_file}")
+            print(f"Skipped video {vid_file}")
             continue
 
         cap = cv2.VideoCapture(vid_file)
         n_frame = 0
 
         while True:
-            frames = []
+            frame_data = []
             for _ in range(batch_size):
                 ret, frame = cap.read()
 
@@ -46,32 +47,37 @@ def get_frames(args):
                     break
 
                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-                # Gaussian blurring
-                blurred_frames = cv2.GaussianBlur(frame, (5, 5), 0)
-                frames.append(blurred_frames)
+
+                frame_data.append(frame)
                 n_frame += 1
-
-            if not frames:
+                
+            if not frame_data:
                 break
-            blurred_frames = np.stack(frames).astype(np.float32)
-            # print(blurred_frames.shape)
+
+            frames = np.stack(frame_data)
+
+            # # Gaussian blurring
+            blurred_frames = frames.astype(np.float32)
+
+            for i in range(frames.shape[0]):
+                blurred_frames[i] = cv2.GaussianBlur(
+                    blurred_frames[i],
+                    args.kernel_size,
+                    args.sigmax
+                )
 
             # average of all the frames after blurring
-            mean_blurred_frame = blurred_frames.mean(axis=0)
             # mean subtraction -- remove the overall brightness and
             # contrast differences caused by variations in the original frames
-            blurred_frames -= mean_blurred_frame
             # normalised the frame
-            mean_normalization_value = np.max(np.abs(blurred_frames))
-            blurred_frames /= mean_normalization_value
-            blurred_frames += 1
-            blurred_frames /= 2
-            delta = 1
+            blurred_frames_mean = blurred_frames.mean(axis=0)
+            norm_factor = np.max(np.abs(blurred_frames))
+            background_subtraction = ((blurred_frames - blurred_frames_mean) / norm_factor + 1) / 2
 
             # detecting motion by finding the differences between frame
             # set the delta : frame[i+delta] - frame[i]
-            for i, frame_offset in enumerate(range(len(frames) - delta)):
-                if (i + (n_frame - batch_size)) in frame_idx:
+            for i, frame_offset in enumerate(range(len(frames) - args.delta)):
+                if (i + (n_frame - batch_size)) in list_frame_indices:
                     file_name = (
                         f"{Path(vid_file).parent.stem}_"
                         f"{Path(vid_file).stem}_"
@@ -80,9 +86,9 @@ def get_frames(args):
                     frame_image = np.dstack(
                         [
                             frames[i] / 255,  # grayscale original frame
-                            blurred_frames[i],  # foreground mask
+                            background_subtraction[i],  # foreground mask
                             np.abs(
-                                blurred_frames[i + delta] - blurred_frames[i]
+                                background_subtraction[i + args.delta] - background_subtraction[i]
                             ),  # motion mask
                         ]
                     ).astype(np.float32)
@@ -93,22 +99,52 @@ def get_frames(args):
         cap.release()
 
 
-def argument_parser():
+def argument_parser() -> argparse.Namespace:
+    """
+    Parse command-line arguments for your script.
+
+    Returns:
+        argparse.Namespace: An object containing the parsed command-line arguments.
+                            The attributes of this object correspond to the defined
+                            command-line arguments in your script.
+    """
+
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--json_path",
-        default="Data/train_data/extracted_frames.json",
-        help="Location of json file with frame_idx.",
+        type=str,
+        required=True,
+        help="Location of json file with list frame indices.",
     )
     parser.add_argument(
         "--out_dir",
-        default="Data/train_data/bg_sub/",
+        type=str,
+        required=True,
         help="Output location for converted frames.",
     )
-    return parser
+    parser.add_argument(
+        '--kernel_size', 
+        nargs=2, 
+        type=int, 
+        default=[5, 5],
+        help='Kernel size for the Gaussian blur (default: 5 5)'
+    )
+    parser.add_argument(
+        '--sigmax', 
+        type=int, 
+        default=0,
+        help='Standard deviation in the X direction of the Gaussian kernel'
+    )
+    parser.add_argument(
+        '--delta', 
+        type=int, 
+        default=10,
+        help='The value how many frame differences we compute'
+    )
 
+    return parser
 
 if __name__ == "__main__":
     args = argument_parser().parse_args()
 
-    get_frames(args)
+    compute_stacked_inputs(args)
diff --git a/bboxes labelling/utils.py b/bboxes labelling/utils.py
@@ -0,0 +1,13 @@
+import json
+
+def read_json_file(file_path):
+    try:
+        with open(file_path, "r") as file:
+            data = json.load(file)
+        return data
+    except FileNotFoundError:
+        print(f"File not found: {file_path}")
+        return None
+    except json.JSONDecodeError:
+        print(f"Error decoding JSON data from file: {file_path}")
+        return None