Skip to content

Commit

Permalink
Merge pull request #72 from SainsburyWellcomeCentre/smg/combine-annot…
Browse files Browse the repository at this point in the history
…ations

Utils for combining annotations
  • Loading branch information
nikk-nikaznan authored Oct 31, 2023
2 parents f7fe954 + 19dc74f commit 99e1fd1
Show file tree
Hide file tree
Showing 14 changed files with 85,721 additions and 227 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
/pose estimation/animal_size_4k/*
/pose estimation/top_down_model_4k_230713/*

/tests/output/
# include mp4 files in tests/data dir
!/tests/data/*.mp4
!/tests/data/**/*.mp4
!/tests/data/**/*.json
3 changes: 2 additions & 1 deletion bboxes_labelling/additional_channels_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import cv2
import numpy as np
from PIL import Image
from utils import read_json_file

from bboxes_labelling.annotations_utils import read_json_file


def apply_grayscale_and_blur(
Expand Down
244 changes: 244 additions & 0 deletions bboxes_labelling/annotations_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
import json
import os
from pathlib import Path
from typing import Any, Optional


def read_json_file(
file_path: str,
) -> dict:
"""
Read JSON file as dict.
Parameters
----------
file_path : str
Path to the JSON file
Returns
-------
Optional[dict]
Dictionary with the JSON data
"""
try:
with open(file_path) as file:
return json.load(file)
except FileNotFoundError as not_found_error:
msg = f"File not found: {file_path}"
raise ValueError(msg) from not_found_error

except json.JSONDecodeError as decode_error:
msg = f"Error decoding JSON data from file: {file_path}"
raise ValueError(msg) from decode_error


def combine_multiple_via_jsons(
list_json_files: list,
json_out_filename: str = "VIA_JSON_combined.json",
json_out_dir: Optional[str] = None,
via_default_dir: Optional[str] = None,
via_project_name: Optional[str] = None,
) -> str:
"""
Combine all the input VIA JSON files into one.
A VIA JSON file is a json file specific to the VIA tool
that defines the annotations and also the visualisation settings
for the tool.
Some attributes of the combined VIA JSON file are taken from
the first input VIA JSON file:
- _via_settings,
- _via_attributes, and
- _via_data_format_version
Parameters
----------
list_json_files : list
list of paths to VIA JSON files
json_out_filename : str, optional
name of the combined VIA JSON file, by default "VIA_JSON_combined.json"
json_out_dir : Optional[str], optional
parent directory to the combined VIA JSON file. If None, the parent
directory of the first VIA JSON file in list_json_files is used.
via_default_dir : Optional[str], optional
The default directory in which to look for images for the VIA project.
If None, the value specified in the first VIA JSON file is used.
A full path is required.
via_project_name : Optional[str], optional
The name of the VIA project.
If None, the value specified in the first VIA JSON file is used.
Returns
-------
json_out_fullpath: str
full path to the combined VIA JSON file
"""
# Initialise data structures for the combined VIA JSON file
via_data_combined = {}
dict_of_via_img_metadata = {}
list_of_via_img_id_list = []

# loop through the input VIA JSON files
for k, js_path in enumerate(list_json_files):
# open VIA JSON file
via_data = read_json_file(js_path)

# take some attributes from the first VIA JSON file
if k == 0:
via_data_combined["_via_settings"] = via_data["_via_settings"]
via_data_combined["_via_attributes"] = via_data["_via_attributes"]
via_data_combined["_via_data_format_version"] = via_data[
"_via_data_format_version"
]

# append dictionary of the images metadata (contains annotations)
dict_of_via_img_metadata.update(via_data["_via_img_metadata"])

# append list of images' IDs
list_of_via_img_id_list.extend(via_data["_via_image_id_list"])

# add data to combined VIA dictionary
via_data_combined["_via_img_metadata"] = dict_of_via_img_metadata
via_data_combined["_via_image_id_list"] = list_of_via_img_id_list

# If required: change _via_settings > core > default_filepath
if via_default_dir:
# check if trailing slash is present in the directory path
# and add it if not
if not via_default_dir.endswith(os.sep):
via_default_dir = f"{via_default_dir}{os.sep}"
# check path is a full path
if Path(via_default_dir) != Path(via_default_dir).resolve():
msg = "Default VIA directory is not a fullpath"
raise ValueError(msg)

# assign directory path to the VIA combined dictionary
via_data_combined["_via_settings"]["core"]["default_filepath"] = via_default_dir

# If required: change _via_settings > project > name
if via_project_name:
via_data_combined["_via_settings"]["project"]["name"] = via_project_name

# Save the VIA combined data as a new JSON file
# if no output directory is passed, use the parent directory
# of the first VIA JSON file in the list
if not json_out_dir:
json_out_dir = str(Path(list_json_files[0]).parent)
json_out_fullpath = Path(json_out_dir) / json_out_filename

with open(json_out_fullpath, "w") as combined_file:
json.dump(via_data_combined, combined_file)

return str(json_out_fullpath)


def convert_via_json_to_coco(
json_file_path: str,
coco_category_ID: int = 1,
coco_category_name: str = "crab",
coco_supercategory_name: str = "animal",
coco_out_filename: Optional[str] = None,
coco_out_dir: Optional[str] = None,
) -> str:
"""
Convert annotation data for one category from VIA-JSON format to COCO.
This function takes annotation data in a VIA JSON format and converts it
into COCO format, which is widely used for object detection datasets.
It assumes that:
- the input JSON data has a specific structure (VIA-format) that includes
image metadata and regions of interest,
- that all annotations are of the same COCO category, and
- that 'iscrowd' = 0 for all annotations.
Parameters
----------
json_file_path : str
Path to the VIA-JSON file containing the annotation data.
coco_category_ID : int, optional
category ID of all the annotations
coco_category_name : str, optional
category name of all the annotations
coco_supercategory_name : str, optional
supercategory for all the annotations
coco_out_filename : str, optional
Name of the COCO output file. If None (default), the input VIA JSON
filename is used with the suffix '_coco_gen'
coco_out_dir : str, optional
Name of the output directory where to store the COCO file.
If None (default), the file is saved at the same location as the
input VIA JSON file.
Returns
-------
str
path to the COCO json file. By default, the file
"""
# Load the annotation data in VIA JSON format
with open(json_file_path) as json_file:
annotation_data = json.load(json_file)

# Create data structure for COCO format
coco_categories = [
{
"id": coco_category_ID,
"name": coco_category_name,
"supercategory": coco_supercategory_name,
},
]
coco_data: dict[str, Any] = {
"info": {},
"licenses": [],
"categories": coco_categories,
"images": [],
"annotations": [],
}

# Iterate through each image and annotation from VIA JSON
image_id = 1
annotation_id = 1
for image_info in annotation_data["_via_img_metadata"].values():
image_data = {
"id": image_id,
"width": 0, # TODO: find how we can get this (not available in JSON)
"height": 0, # TODO: find how we can get this (not available in JSON)
"file_name": image_info["filename"],
}
coco_data["images"].append(image_data)

for region in image_info["regions"]:
x, y, width, height = (
region["shape_attributes"]["x"],
region["shape_attributes"]["y"],
region["shape_attributes"]["width"],
region["shape_attributes"]["height"],
)

annotation_data = {
"id": annotation_id,
"image_id": image_id,
"category_id": coco_category_ID,
"bbox": [x, y, width, height],
"area": width * height,
"iscrowd": 0,
}
coco_data["annotations"].append(annotation_data)
annotation_id += 1

# update image_id
image_id += 1

# Export the annotation data in COCO format to a JSON file
# if no filename provided: use VIA JSON filename + "_coco_gen.json"
if not coco_out_filename:
coco_out_filename = Path(json_file_path).stem + "_coco_gen.json"
# if no output directory provided: use VIA JSON parent directory
if not coco_out_dir:
coco_out_dir = str(Path(json_file_path).parent)
coco_out_fullpath = Path(coco_out_dir) / coco_out_filename

with open(coco_out_fullpath, "w") as f:
json.dump(coco_data, f)

return str(coco_out_fullpath)
42 changes: 42 additions & 0 deletions bboxes_labelling/combine_and_format_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from pathlib import Path

from annotations_utils import combine_multiple_via_jsons, convert_via_json_to_coco


def main(
parent_dir_via_jsons,
via_default_dir,
via_project_name,
):
# Get list of VIA JSON files
list_json_files = [
x
for x in Path(parent_dir_via_jsons).glob("*")
if x.is_file() and str(x).endswith(".json")
]
list_json_files.sort()

# Combine VIA JSONS
json_out_fullpath = combine_multiple_via_jsons(
list_json_files,
via_default_dir=via_default_dir,
via_project_name=via_project_name,
)

# Convert to COCO
return convert_via_json_to_coco(json_out_fullpath)


if __name__ == "__main__":
parent_dir_via_jsons = (
"/Volumes/zoo/users/sminano/crabs_bboxes_labels/Aug2023/annotations"
)
via_default_dir = str(Path(parent_dir_via_jsons).parent)
via_project_name = "Aug2023"

coco_out_fullpath = main(
parent_dir_via_jsons,
via_default_dir,
via_project_name,
)
print(coco_out_fullpath)
91 changes: 0 additions & 91 deletions bboxes_labelling/combine_coco.py

This file was deleted.

Loading

0 comments on commit 99e1fd1

Please sign in to comment.