From 35321881799978d5f0e4bc5bd4155d7b632a479c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 10 Dec 2024 14:08:10 +0000
Subject: [PATCH] Integration test for detect+track video (#242)

* Basic test passes

* Added tests for save frames and save video functionality

* Factor out test inputs as fixture

* Fix fixture name and parametrisation

* Rename integration test modules and refactor fixture

* Parametrise output_dir

* Clarify CLI help

* Remove home mocking commented out fixture

* Add caching to testing on CI

* Small additions

* Dummy commit to check if cache is shared

* Remove zip from GIN repo and pooch registry

* Fix tests by forcing download of all mlflow files

* Replace macos-13 for macos-12 as intel macos

* Revert "Replace macos-13 for macos-12 as intel macos"

This reverts commit ec69f8e141c8ec8a8f7dd38ebe8ca67a7c72977c.

* Make parametrisation of output_dir_name more explicit

* Remove output directory parametrisation (cover as a unit test instead)

* Fix deprecation warnings

* Revert "Remove zip from GIN repo and pooch registry"

This reverts commit 46daa327a837757cd7f2fbf3938a054d437e3d47.

* Fix docstring fixture

* Remove timestamp

* Add no timestamp flag parametrisation

* Mark integration tests as slow

* Skip slow tests in macos-13

* Fix optional type for py3.9

* Correctly pass tox parameters to pytest

* Fix not-equal
---
 .github/workflows/test_and_deploy.yml         |  21 ++-
 conftest.py                                   |   1 +
 pyproject.toml                                |   5 +-
 tests/fixtures/integration.py                 |  41 +++++
 tests/test_integration/test_annotations.py    |   2 +-
 .../test_integration/test_frame_extraction.py |   2 +-
 tests/test_integration/test_inference.py      | 167 ++++++++++++++++++
 7 files changed, 235 insertions(+), 4 deletions(-)
 create mode 100644 tests/fixtures/integration.py
 create mode 100644 tests/test_integration/test_inference.py

diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
index 6c8d3f01..291c9661 100644
--- a/.github/workflows/test_and_deploy.yml
+++ b/.github/workflows/test_and_deploy.yml
@@ -37,7 +37,26 @@ jobs:
           - os: macos-latest  # M1 macOS
             python-version: "3.10"
     steps:
-      - uses: neuroinformatics-unit/actions/test@v2
+      - name: Cache test data
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.crabs-exploration-test-data/*
+          key: cached-test-data
+          enableCrossOsArchive: true
+      - name: Run all tests except marked slow (if macos-13)
+        # this is because we observed macos-13 runners in CI
+        # are sometimes x2 as slow as the others. See
+        # https://github.com/actions/runner-images/issues/3885#issuecomment-1848423579
+        if: matrix.os == 'macos-13'
+        uses: neuroinformatics-unit/actions/test@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          secret-codecov-token: ${{ secrets.CODECOV_TOKEN }}
+          tox-args: "-- -m 'not slow'"
+      - name: Run all tests (if not macos-13)
+        if: matrix.os != 'macos-13'
+        uses: neuroinformatics-unit/actions/test@v2
         with:
           python-version: ${{ matrix.python-version }}
           secret-codecov-token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/conftest.py b/conftest.py
index f762c948..5296bee6 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,5 +1,6 @@
 """Pytest configuration file."""
 
 pytest_plugins = [
+    "tests.fixtures.integration",
     "tests.fixtures.frame_extraction",
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 300419d6..1ba630fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,8 @@ dev = [
     "ruff",
     "setuptools_scm",
     "check-manifest",
+    "pooch",
+    "tqdm",
     # "codespell",
     # "pandas-stubs",
     # "types-attrs",
@@ -146,5 +148,6 @@ python =
 extras =
     dev
 commands =
-    pytest -v --color=yes --cov=crabs --cov-report=xml
+    pytest -v --color=yes --cov=crabs --cov-report=xml {posargs}
 """
+# {posargs} allows parameters to get to pytest
diff --git a/tests/fixtures/integration.py b/tests/fixtures/integration.py
new file mode 100644
index 00000000..393928c3
--- /dev/null
+++ b/tests/fixtures/integration.py
@@ -0,0 +1,41 @@
+"""Pytest fixtures for integration tests."""
+
+from pathlib import Path
+
+import pooch
+import pytest
+
+GIN_TEST_DATA_REPO = "https://gin.g-node.org/SainsburyWellcomeCentre/crabs-exploration-test-data"
+
+
+@pytest.fixture(scope="session")
+def pooch_registry() -> dict:
+    """Pooch registry for the test data.
+
+    This fixture is common to the entire test session. The
+    file registry is downloaded fresh for every test session.
+
+    Returns
+    -------
+    dict
+        URL and hash of the GIN repository with the test data
+
+    """
+    # Initialise pooch registry
+    registry = pooch.create(
+        Path.home() / ".crabs-exploration-test-data",
+        base_url=f"{GIN_TEST_DATA_REPO}/raw/master/test_data",
+    )
+
+    # Download only the registry file from GIN
+    # if known_hash = None, the file is always downloaded.
+    file_registry = pooch.retrieve(
+        url=f"{GIN_TEST_DATA_REPO}/raw/master/files-registry.txt",
+        known_hash=None,
+        path=Path.home() / ".crabs-exploration-test-data",
+    )
+
+    # Load registry file onto pooch registry
+    registry.load_registry(file_registry)
+
+    return registry
diff --git a/tests/test_integration/test_annotations.py b/tests/test_integration/test_annotations.py
index 6b0a8e34..b6bf3c47 100644
--- a/tests/test_integration/test_annotations.py
+++ b/tests/test_integration/test_annotations.py
@@ -400,7 +400,7 @@ def test_exclude_pattern(via_json_1: str, via_json_2: str, tmp_path: Path):
     # combine input json files, excluding those that end with _2.json
     json_out_fullpath = combine_multiple_via_jsons(
         [via_json_1, via_json_2],
-        exclude_pattern="\w+_2.json$",
+        exclude_pattern=r"\w+_2.json$",
         json_out_dir=str(tmp_path),
     )
 
diff --git a/tests/test_integration/test_frame_extraction.py b/tests/test_integration/test_frame_extraction.py
index c385ef15..976b5545 100644
--- a/tests/test_integration/test_frame_extraction.py
+++ b/tests/test_integration/test_frame_extraction.py
@@ -56,7 +56,7 @@ def assert_output_files(list_input_videos: list, cli_dict: dict) -> None:
 
     # check filename format of images: <video_name>_frame_{frame_idx:08d}
     list_regex_patterns = [
-        Path(input_video_str).stem + "_frame_[\d]{8}$"
+        Path(input_video_str).stem + r"_frame_[\d]{8}$"
         for input_video_str in list_input_videos
     ]
     for f in list_imgs:
diff --git a/tests/test_integration/test_inference.py b/tests/test_integration/test_inference.py
new file mode 100644
index 00000000..fe463737
--- /dev/null
+++ b/tests/test_integration/test_inference.py
@@ -0,0 +1,167 @@
+import re
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+import cv2
+import pooch
+import pytest
+
+from crabs.tracker.utils.io import open_video
+
+
+@pytest.fixture()
+def input_data_paths(pooch_registry: pooch.Pooch):
+    """Input data for a detector+tracking run.
+
+    The data is fetched from the pooch registry.
+
+    Returns
+    -------
+    dict
+        Dictionary with the paths to the input video, annotations,
+        tracking configuration and trained model.
+
+    """
+    input_data_paths = {}
+    video_root_name = "04.09.2023-04-Right_RE_test_3_frames"
+    input_data_paths["video_root_name"] = video_root_name
+
+    # get trained model from pooch registry
+    # download and unzip ml-runs
+    list_files_ml_runs = pooch_registry.fetch(
+        "ml-runs.zip",
+        processor=pooch.Unzip(extract_dir=""),
+        progressbar=True,
+    )
+    # get path to the last checkpoint
+    input_data_paths["ckpt"] = next(
+        x for x in list_files_ml_runs if x.endswith("last.ckpt")
+    )
+
+    # get input video, annotations and config from registry
+    map_key_to_filepath = {
+        "video": f"{video_root_name}/{video_root_name}.mp4",
+        "annotations": f"{video_root_name}/{video_root_name}_ground_truth.csv",
+        "tracking_config": f"{video_root_name}/tracking_config.yaml",
+    }
+    for key, filepath in map_key_to_filepath.items():
+        input_data_paths[key] = pooch_registry.fetch(filepath)
+
+    return input_data_paths
+
+
+# mark integration test as slow
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "no_timestamp_flag",
+    [
+        None,
+        "--output_dir_no_timestamp",
+    ],
+)
+@pytest.mark.parametrize(
+    "flags_to_append",
+    [
+        [],
+        ["--save_video"],
+        ["--save_frames"],
+        ["--save_video", "--save_frames"],
+    ],
+)
+def test_detect_and_track_video(
+    input_data_paths: dict,
+    tmp_path: Path,
+    flags_to_append: list,
+    no_timestamp_flag: Optional[str],
+):
+    """Test the detect-and-track-video entry point when groundtruth is passed.
+
+    The test checks:
+    - the exit code of the detect-and-track-video command
+    - the existence of csv file with predictions
+    - the existence of csv file with tracking metrics
+    - the existence of video file if requested
+    - the existence of exported frames if requested
+
+    """
+    # Define main detect-and-track-video command
+    main_command = [
+        "detect-and-track-video",
+        f"--trained_model_path={input_data_paths['ckpt']}",
+        f"--video_path={input_data_paths['video']}",
+        f"--config_file={input_data_paths['tracking_config']}",
+        f"--annotations_file={input_data_paths['annotations']}",
+        "--accelerator=cpu",
+    ]
+    # append required flags
+    main_command.extend(flags_to_append)
+    if no_timestamp_flag:
+        main_command.append(no_timestamp_flag)
+
+    # run command
+    completed_process = subprocess.run(
+        main_command,
+        check=True,
+        cwd=tmp_path,
+        # set cwd to Pytest's temporary directory
+        # so the output is saved there
+    )
+
+    # check the command runs successfully
+    assert completed_process.returncode == 0
+
+    # check the tracking output directory is created and has expected name
+    output_dir_name_expected = "tracking_output"
+    if no_timestamp_flag:
+        expected_pattern = re.compile(rf"{output_dir_name_expected}$")
+    else:
+        expected_pattern = re.compile(
+            rf"{output_dir_name_expected}_\d{{8}}_\d{{6}}$"
+        )
+    list_cwd_subdirs = [x for x in tmp_path.iterdir() if x.is_dir()]
+    tracking_output_dir = list_cwd_subdirs[0]
+    assert len(list_cwd_subdirs) == 1
+    assert expected_pattern.match(tracking_output_dir.stem)
+
+    # check csv with predictions exists
+    predictions_csv = (
+        tmp_path
+        / tracking_output_dir
+        / f"{input_data_paths['video_root_name']}_tracks.csv"
+    )
+    assert (predictions_csv).exists()
+
+    # check csv with tracking metrics exists
+    tracking_metrics_csv = (
+        tmp_path / tracking_output_dir / "tracking_metrics_output.csv"
+    )
+    assert (tracking_metrics_csv).exists()
+
+    # if the video is requested: check it exists
+    if "--save_video" in flags_to_append:
+        assert (
+            tmp_path
+            / tracking_output_dir
+            / f"{input_data_paths['video_root_name']}_tracks.mp4"
+        ).exists()
+
+    # if the frames are requested: check they exist
+    if "--save_frames" in flags_to_append:
+        input_video_object = open_video(input_data_paths["video"])
+        total_n_frames = int(input_video_object.get(cv2.CAP_PROP_FRAME_COUNT))
+
+        # check frames subdirectory exists
+        frames_subdir = (
+            tmp_path
+            / tracking_output_dir
+            / f"{input_data_paths['video_root_name']}_frames"
+        )
+        assert frames_subdir.exists()
+
+        # check files are named as expected
+        expected_pattern = re.compile(r"frame_\d{8}.png")
+        list_files = [x for x in frames_subdir.iterdir() if x.is_file()]
+
+        assert len(list_files) == total_n_frames
+        assert all(expected_pattern.match(x.name) for x in list_files)