From 4a38befa7a0c6c656ff964b67d5bf10419b3fd69 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 1 Nov 2023 17:15:08 +0000 Subject: [PATCH 01/31] edit bash script for Aug23 day3 --- .../run_frame_extraction_array.sh | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh index 4452aaa9..eda65b68 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh @@ -10,11 +10,11 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-1%4 +#SBATCH --array=0-3%4 #------- # NOTE!! -# with "SBATCH --array=0-1%4" ---> runs n separate jobs, but not more than m at a time. +# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time. # the number of array jobs should match the number of input files # --------------------- @@ -29,9 +29,10 @@ module load SLEAP # TODO: have list here? # INPUT_DATA_LIST=($( Date: Wed, 1 Nov 2023 18:58:38 +0000 Subject: [PATCH 02/31] draft bash script that copies logs to destination and reencodes videos --- ...run_reencode_and_frame_extraction_array.sh | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100755 bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh new file mode 100755 index 00000000..57576fa1 --- /dev/null +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +#SBATCH -p gpu # partition +#SBATCH -N 1 # number of nodes +#SBATCH --mem 64G # memory pool for all cores +#SBATCH -n 2 # number of cores +#SBATCH -t 3-00:00 # time (D-HH:MM) +#SBATCH --gres gpu:1 # request 1 GPU (of any kind) +#SBATCH -o slurm_array.%N.%A-%a.out +#SBATCH -e slurm_array.%N.%A-%a.err +#SBATCH --mail-type=ALL +#SBATCH --mail-user=s.minano@ucl.ac.uk +#SBATCH --array=0-0%5 + +#------- +# NOTE!! +# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time. +# the number of array jobs should match the number of input files + +# --------------------- +# Load required modules +# ---------------------- +module load SLEAP + +# ---------------------- +# Input data +# ---------------------- +# INPUT_DIR=/ceph/zoo/raw/CrabField/ramalhete_2021 +# # TODO: have list here? +# INPUT_DATA_LIST=($( Date: Wed, 1 Nov 2023 19:23:04 +0000 Subject: [PATCH 03/31] add reencoding step --- ...run_reencode_and_frame_extraction_array.sh | 52 +++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 57576fa1..70cf7cc3 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -77,13 +77,28 @@ SCRIPT_DIR=$SCRATCH_PERSONAL_DIR/crabs-exploration/bboxes_labelling # ------------------- for i in {1..${SLURM_ARRAY_TASK_COUNT}} do + # Input video SAMPLE=${INPUT_DATA_LIST[${SLURM_ARRAY_TASK_ID}]} - - # reencode video? - - # run frame extraction algorithm + echo "Input video: $SAMPLE" + + # Reencode video? + # reencode input videos following SLEAP's recommendations + # https://sleap.ai/help.html#does-my-data-need-to-be-in-a-particular-format + filename_no_ext="$(basename "$SAMPLE" | sed 's/\(.*\)\..*/\1/')" # filename without extension + REENCODED_VIDEO_PATH="$OUTPUT_DIR/$OUTPUT_SUBDIR/$filename_no_ext"_re.mp4 + + ffmpeg -y -i "$SAMPLE" \ + -c:v libx264 \ + -pix_fmt yuv420p \ + -preset superfast \ + -crf 15 \ + $REENCODED_VIDEO_PATH + echo "Reencoded video: $REENCODED_VIDEO_PATH" + echo "--------" + + # Run frame extraction algorithm on reencoded video python $SCRIPT_DIR/extract_frames_to_label_w_sleap.py \ - $SAMPLE \ + $REENCODED_VIDEO_PATH \ --output_path $OUTPUT_DIR \ --output_subdir $OUTPUT_SUBDIR \ --video_extensions $PARAM_VIDEO_EXT \ @@ -94,6 +109,15 @@ do --per_cluster $PARAM_PER_CLUSTER \ --compute_features_per_video + echo "Frames extracted from video: $REENCODED_VIDEO_PATH" + echo "--------" + + + # # Delete reencoded video? + # rm $REENCODED_VIDEO_NAME + # echo "Deleted reencoded video: $REENCODED_VIDEO_NAME" + # echo "--------" + # Move logs for this job to subdir with extracted frames # TODO: ideally these are moved also if frame extraction fails mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.err /$LOG_DIR @@ -101,3 +125,21 @@ do done +# # reencode input videos following SLEAP's recommendations +# # https://sleap.ai/help.html#does-my-data-need-to-be-in-a-particular-format +# for i in {1..${SLURM_ARRAY_TASK_COUNT}} +# do +# FILEPATH=${INPUT_DATA_LIST[${SLURM_ARRAY_TASK_ID}]} +# filename_no_ext="$(basename "$FILEPATH" | sed 's/\(.*\)\..*/\1/')" # filename without extension +# echo "Input video: $FILEPATH" + +# ffmpeg -y -i "$FILEPATH" \ +# -c:v libx264 \ +# -pix_fmt yuv420p \ +# -preset superfast \ +# -crf 15 \ +# "$OUTPUT_DIR/$OUTPUT_SUBDIR/$filename_no_ext.mp4" + +# echo "Reencoded video: $OUTPUT_DIR/$OUTPUT_SUBDIR/$filename_no_ext.mp4" +# echo "---" +# done \ No newline at end of file From f2ada6f858694c1394e651f8311e380e4065659f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 2 Nov 2023 13:29:51 +0000 Subject: [PATCH 04/31] add reencoding step and log ffmpeg params (WIP) --- ...run_reencode_and_frame_extraction_array.sh | 55 ++++++++----------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 70cf7cc3..b6e3f49e 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -29,25 +29,31 @@ module load SLEAP # # TODO: have list here? # INPUT_DATA_LIST=($( Date: Thu, 2 Nov 2023 13:30:21 +0000 Subject: [PATCH 05/31] frame extraction with logs but without reencoding --- .../run_frame_extraction_array.sh | 49 +++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh index eda65b68..169801c0 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-3%4 +#SBATCH --array=0-9%5 #------- # NOTE!! @@ -29,24 +29,30 @@ module load SLEAP # TODO: have list here? # INPUT_DATA_LIST=($( Date: Mon, 13 Nov 2023 16:58:00 +0000 Subject: [PATCH 06/31] reencoding and frame extraction for day2 --- ...run_reencode_and_frame_extraction_array.sh | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index b6e3f49e..9ced9686 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-0%5 +#SBATCH --array=0-11%5 #------- # NOTE!! @@ -25,11 +25,22 @@ module load SLEAP # ---------------------- # Input data # ---------------------- -# INPUT_DIR=/ceph/zoo/raw/CrabField/ramalhete_2021 -# # TODO: have list here? +# INPUT_DIR=/ceph/zoo/raw/CrabField/ramalhete_2023 +# # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Wed, 15 Nov 2023 10:43:21 +0000 Subject: [PATCH 07/31] add loops to logs. change filename of reencoded logs. --- .../run_reencode_and_frame_extraction_array.sh | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 9ced9686..51b55fd9 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -136,18 +136,15 @@ do # copy .err file to go with reencoded video too # TODO: make a nicer log, and not dependant on whether frame extract is OK! - # filename: keep only job ID and name of reencoded video - # TODO replace by for loop in {err, out} - cp slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.err \ - /$REENCODED_VIDEOS_SUBDIR/slurm_array."$filename_no_ext"_RE.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.err - cp slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.out \ - /$REENCODED_VIDEOS_SUBDIR/slurm_array."$filename_no_ext"_RE.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.out + # filename: {reencoded video name}.{slurm_array}.{slurm_job_id} + for ext in err out + cp slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext \ + /$REENCODED_VIDEOS_SUBDIR/"$filename_no_ext"_RE.slurm_array.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext # Move logs for this job to subdir with extracted frames # TODO: ideally these are moved also if frame extraction fails! - # TODO replace by for loop in {err, out} - mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.err /$LOG_DIR - mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.out /$LOG_DIR + for ext in err out + mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext /$LOG_DIR done From 18f9093bec505a4a188241206c3735f1e61251f7 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 15 Nov 2023 10:51:07 +0000 Subject: [PATCH 08/31] remove parent directory name from extracted image filename --- bboxes_labelling/extract_frames_to_label_w_sleap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bboxes_labelling/extract_frames_to_label_w_sleap.py b/bboxes_labelling/extract_frames_to_label_w_sleap.py index 8951e4b2..63621345 100755 --- a/bboxes_labelling/extract_frames_to_label_w_sleap.py +++ b/bboxes_labelling/extract_frames_to_label_w_sleap.py @@ -325,7 +325,7 @@ def extract_frames_to_label_from_video( # file naming format: parentdir_videoname_frame_XXX.png else: file_path = video_output_dir / Path( - f"{Path(vid_str).parent.stem}_" + # f"{Path(vid_str).parent.stem}_" #---- f"{Path(vid_str).stem}_" f"frame_{frame_idx:06d}.png", ) From 2c384370fe6dd3ed1a445b5a9808399138dcd87c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 15 Nov 2023 11:14:41 +0000 Subject: [PATCH 09/31] script for day3 job --- ...run_reencode_and_frame_extraction_array.sh | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 51b55fd9..58d81485 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -29,31 +29,32 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Wed, 15 Nov 2023 11:17:43 +0000 Subject: [PATCH 10/31] fix for loop --- .../run_reencode_and_frame_extraction_array.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 58d81485..6fbd6482 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -139,13 +139,17 @@ do # TODO: make a nicer log, and not dependant on whether frame extract is OK! # filename: {reencoded video name}.{slurm_array}.{slurm_job_id} for ext in err out + do cp slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext \ /$REENCODED_VIDEOS_SUBDIR/"$filename_no_ext"_RE.slurm_array.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext + done # Move logs for this job to subdir with extracted frames # TODO: ideally these are moved also if frame extraction fails! for ext in err out + do mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext /$LOG_DIR + done done From a645a3ba0a38646ca022554725d32bb420e5c351 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 16 Nov 2023 12:03:38 +0000 Subject: [PATCH 11/31] repeat reencoded failed jobs --- .../run_reencode_and_frame_extraction_array.sh | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 6fbd6482..0784fd6c 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-11%5 +#SBATCH --array=0-2%5 #------- # NOTE!! @@ -29,18 +29,9 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Fri, 17 Nov 2023 09:58:20 +0000 Subject: [PATCH 12/31] Sep2023 day4 job --- .../run_reencode_and_frame_extraction_array.sh | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 0784fd6c..5279aac1 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-2%5 +#SBATCH --array=0-9%5 #------- # NOTE!! @@ -29,9 +29,16 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Fri, 17 Nov 2023 10:28:19 +0000 Subject: [PATCH 13/31] repeat day 1-04 and 05 on reencoded with less frames --- .../run_reencode_and_frame_extraction_array.sh | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh index 5279aac1..49676264 100755 --- a/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bboxes_labelling/cluster_bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-9%5 +#SBATCH --array=0-3%5 #------- # NOTE!! @@ -29,16 +29,10 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Mon, 20 Nov 2023 10:26:56 +0100 Subject: [PATCH 14/31] add clarification for array job syntax --- bash_scripts/run_frame_extraction_array.sh | 5 +++-- bash_scripts/run_reencode_and_frame_extraction_array.sh | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/bash_scripts/run_frame_extraction_array.sh b/bash_scripts/run_frame_extraction_array.sh index a2b0133a..a742d4b9 100755 --- a/bash_scripts/run_frame_extraction_array.sh +++ b/bash_scripts/run_frame_extraction_array.sh @@ -13,9 +13,10 @@ #SBATCH --array=0-9%5 #------- -# NOTE!! -# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time. +# NOTE +# with "SBATCH --array=0-n%m" ---> runs n+1 separate jobs, but not more than m at a time. # the number of array jobs should match the number of input files +#------- # --------------------- # Load required modules diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index dfd92a17..1de44e55 100755 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -13,9 +13,10 @@ #SBATCH --array=0-3%5 #------- -# NOTE!! -# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time. +# NOTE +# with "SBATCH --array=0-n%m" ---> runs n+1 separate jobs, but not more than m at a time. # the number of array jobs should match the number of input files +#------- # --------------------- # Load required modules From cbaff3720742ff57f9e27c0bc4d1b68455854f17 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:28:03 +0100 Subject: [PATCH 15/31] remove logs comment --- bash_scripts/run_frame_extraction_array.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/bash_scripts/run_frame_extraction_array.sh b/bash_scripts/run_frame_extraction_array.sh index a742d4b9..cbf1a2b4 100755 --- a/bash_scripts/run_frame_extraction_array.sh +++ b/bash_scripts/run_frame_extraction_array.sh @@ -51,9 +51,6 @@ OUTPUT_SUBDIR="Sep2023_day1" # SLURM logs dir LOG_DIR=$OUTPUT_DIR/$OUTPUT_SUBDIR/logs mkdir -p $LOG_DIR # create if it doesnt exist -# can I set SLURM logs location here? -# srun -e slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.err - # ---------------------- # parameters From 5a9d5cb7f13566a7b1d57cd99555c5d2c65dc363 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:29:26 +0100 Subject: [PATCH 16/31] remove parent directory from name of extracted frame --- crabs/bboxes_labelling/extract_frames_to_label_w_sleap.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crabs/bboxes_labelling/extract_frames_to_label_w_sleap.py b/crabs/bboxes_labelling/extract_frames_to_label_w_sleap.py index 8046d0d5..38fbf42c 100755 --- a/crabs/bboxes_labelling/extract_frames_to_label_w_sleap.py +++ b/crabs/bboxes_labelling/extract_frames_to_label_w_sleap.py @@ -325,12 +325,10 @@ def extract_frames_to_label_from_video( raise KeyError(msg) # If read successfully: save to file - # file naming format: parentdir_videoname_frame_XXX.png + # file naming format: videoname_frame_XXX.png else: file_path = video_output_dir / Path( - # f"{Path(vid_str).parent.stem}_" #---- - f"{Path(vid_str).stem}_" - f"frame_{frame_idx:06d}.png", + f"{Path(vid_str).stem}_" f"frame_{frame_idx:06d}.png", ) img_saved = cv2.imwrite(str(file_path), frame) if img_saved: From 7b6b31853cfe028de404b280b960b8c5b0027339 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:30:41 +0100 Subject: [PATCH 17/31] clarify TODO about all files in directory --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 1de44e55..6b6b489d 100755 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -26,8 +26,8 @@ module load SLEAP # ---------------------- # Input data # ---------------------- +# TODO: change to all files in a directory? # INPUT_DIR=/ceph/zoo/raw/CrabField/ramalhete_2023 -# # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Mon, 20 Nov 2023 11:43:27 +0000 Subject: [PATCH 18/31] clarify sbatch syntax for array job --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 1de44e55..dfd92a17 100755 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -13,10 +13,9 @@ #SBATCH --array=0-3%5 #------- -# NOTE -# with "SBATCH --array=0-n%m" ---> runs n+1 separate jobs, but not more than m at a time. +# NOTE!! +# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time. # the number of array jobs should match the number of input files -#------- # --------------------- # Load required modules From c83d4d036a6e78f44b869e8bf7406cac8f02e376 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:44:41 +0000 Subject: [PATCH 19/31] add option to reencode or not the videos --- ...run_reencode_and_frame_extraction_array.sh | 102 ++++++++++-------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index dfd92a17..065629fd 100755 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -29,35 +29,40 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Mon, 20 Nov 2023 11:45:00 +0000 Subject: [PATCH 20/31] print to log if frame extraction fails --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 065629fd..66e845af 100755 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -137,7 +137,11 @@ do --per_cluster $PARAM_PER_CLUSTER \ --compute_features_per_video - echo "Frames extracted from video: $REENCODED_VIDEO_PATH" + if [ "$?" -ne 0 ]; then + echo "Frame extraction failed! Please check .err log" + else + echo "Frames extracted from video: $FRAME_EXTRACTION_INPUT_VIDEO" + fi echo "--------" # Reencoded videos log From cba1639abab1739a0824d6312efd078fc035059e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:45:15 +0000 Subject: [PATCH 21/31] fix path for new structure --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 66e845af..96c163d0 100755 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -85,7 +85,7 @@ fi # ---------------------- # assumes repo located at '/ceph/scratch/sminano' SCRATCH_PERSONAL_DIR=/ceph/scratch/sminano -SCRIPT_DIR=$SCRATCH_PERSONAL_DIR/crabs-exploration/bboxes_labelling +SCRIPT_DIR=$SCRATCH_PERSONAL_DIR/crabs-exploration/crabs/bboxes_labelling # ------------------- # Run python script From 5977bd360b41e7b333e34a2b1191df8ab7705b16 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:45:39 +0000 Subject: [PATCH 22/31] clarify reencoding is optional --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 bash_scripts/run_reencode_and_frame_extraction_array.sh diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh old mode 100755 new mode 100644 index 96c163d0..002891a0 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -97,7 +97,7 @@ do echo "Input video: $SAMPLE" echo "--------" - # Reencode video + # Reencode video if required # following SLEAP's recommendations # https://sleap.ai/help.html#does-my-data-need-to-be-in-a-particular-format if [ "$flag_reencode_input_videos"=true ] ; then From e16ac59a87cab963f79999ecb2ad1966bd15e3cf Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:48:21 +0000 Subject: [PATCH 23/31] move check earlier --- ...run_reencode_and_frame_extraction_array.sh | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 002891a0..eeca6622 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -36,6 +36,16 @@ INPUT_DATA_LIST=( flag_reencode_input_videos=false reencoded_extension=mp4 +# --------------------------- +# Check number of array jobs +# ------------------------------ +# Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT +# if not, exit +if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then + echo "The number of array tasks does not match the number of inputs" + exit 1 +fi + # ---------------------- # Output data location # ---------------------- @@ -70,16 +80,6 @@ PARAM_N_CLUSTERS=5 PARAM_PER_CLUSTER=4 -# --------------------------- -# Check number of array jobs -# ------------------------------ -# Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT -# if not, exit -if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then - echo "The number of array tasks does not match the number of inputs" - exit 1 -fi - # ---------------------- # Script location # ---------------------- From 0bce7f272c7213727931c78eb5c933f3082ea619 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:48:49 +0000 Subject: [PATCH 24/31] check just below input list --- .../run_reencode_and_frame_extraction_array.sh | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index eeca6622..e64db728 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -31,14 +31,6 @@ module load SLEAP INPUT_DATA_LIST=( "/ceph/zoo/raw/CrabField/ramalhete_2023/04.09.2023-Day1/04.09.2023-05-Left.mp4" ) - -# set whether to reencode input videos or not -flag_reencode_input_videos=false -reencoded_extension=mp4 - -# --------------------------- -# Check number of array jobs -# ------------------------------ # Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT # if not, exit if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then @@ -46,6 +38,10 @@ if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then exit 1 fi +# set whether to reencode input videos or not +flag_reencode_input_videos=false +reencoded_extension=mp4 + # ---------------------- # Output data location # ---------------------- From 02fc3a6c3c234fc99799af720b38449579bd4204 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:49:07 +0000 Subject: [PATCH 25/31] fix number of array jobs --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index e64db728..4ae714bf 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-3%5 +#SBATCH --array=0-1%5 #------- # NOTE!! From 622d2e48b13479e8fa501f403f2d9838305af9a2 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:49:50 +0000 Subject: [PATCH 26/31] actually fix number of array jobs... --- bash_scripts/run_reencode_and_frame_extraction_array.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 4ae714bf..5247cdfc 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -10,7 +10,7 @@ #SBATCH -e slurm_array.%N.%A-%a.err #SBATCH --mail-type=ALL #SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-1%5 +#SBATCH --array=0-0%5 #------- # NOTE!! From b7934d1e483205e063c0fe62293a4284b8b0cd1f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:59:16 +0000 Subject: [PATCH 27/31] fix path and if statements --- .../run_reencode_and_frame_extraction_array.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index 5247cdfc..a937de90 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -29,7 +29,7 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Mon, 20 Nov 2023 12:08:45 +0000 Subject: [PATCH 28/31] derive extension from input video --- .../run_reencode_and_frame_extraction_array.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index a937de90..ddc2ca48 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -31,6 +31,8 @@ module load SLEAP INPUT_DATA_LIST=( "/ceph/zoo/users/sminano/crabs_reencoded_videos/Sep2023_day1_reencoded/04.09.2023-05-Left_RE.mp4" ) +# "/ceph/zoo/users/sminano/crabs_reencoded_videos/Sep2023_day4_reencoded/07.09.2023-01-Right_RE.mp4" + # Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT # if not, exit if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then @@ -40,7 +42,7 @@ fi # set whether to reencode input videos or not flag_reencode_input_videos=false -reencoded_extension=mp4 +# reencoded_extension=mp4 # ---------------------- # Output data location @@ -64,11 +66,11 @@ fi # Frame extraction parameters # ----------------------------------- # extension of the videos from which frames are extracted! -if [ "$flag_reencode_input_videos" = true ] ; then - PARAM_VIDEO_EXT=$reencoded_extension -else - PARAM_VIDEO_EXT=MOV # TODO: derive video extension if not provided? -fi +# if [ "$flag_reencode_input_videos" = true ] ; then +# PARAM_VIDEO_EXT=$reencoded_extension +# else +# PARAM_VIDEO_EXT=MOV # TODO: derive video extension if not provided? +# fi PARAM_INI_SAMPLES=500 PARAM_SCALE=0.5 PARAM_N_COMPONENTS=5 @@ -120,6 +122,10 @@ do FRAME_EXTRACTION_INPUT_VIDEO=$SAMPLE fi + # Get extension of input video + filename=$(basename -- "$FRAME_EXTRACTION_INPUT_VIDEO") + PARAM_VIDEO_EXT="${filename##*.}" + # Run frame extraction algorithm on video python $SCRIPT_DIR/extract_frames_to_label_w_sleap.py \ $FRAME_EXTRACTION_INPUT_VIDEO \ From bbad7de62d8b06bd6496d0fa63640992e62a8ccc Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 20 Nov 2023 12:45:54 +0000 Subject: [PATCH 29/31] bash script for day4 01-Right rep --- ...run_reencode_and_frame_extraction_array.sh | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/bash_scripts/run_reencode_and_frame_extraction_array.sh b/bash_scripts/run_reencode_and_frame_extraction_array.sh index ddc2ca48..9d4036c3 100644 --- a/bash_scripts/run_reencode_and_frame_extraction_array.sh +++ b/bash_scripts/run_reencode_and_frame_extraction_array.sh @@ -29,9 +29,8 @@ module load SLEAP # # TODO: have list here? change to directory? # INPUT_DATA_LIST=($( Date: Mon, 20 Nov 2023 16:21:08 +0100 Subject: [PATCH 30/31] delete frame extraction only script and frame extraction local --- bash_scripts/run_frame_extraction_array.sh | 106 --------------------- bash_scripts/run_frame_extraction_local.sh | 45 --------- 2 files changed, 151 deletions(-) delete mode 100755 bash_scripts/run_frame_extraction_array.sh delete mode 100755 bash_scripts/run_frame_extraction_local.sh diff --git a/bash_scripts/run_frame_extraction_array.sh b/bash_scripts/run_frame_extraction_array.sh deleted file mode 100755 index cbf1a2b4..00000000 --- a/bash_scripts/run_frame_extraction_array.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/bash - -#SBATCH -p gpu # partition -#SBATCH -N 1 # number of nodes -#SBATCH --mem 64G # memory pool for all cores -#SBATCH -n 2 # number of cores -#SBATCH -t 3-00:00 # time (D-HH:MM) -#SBATCH --gres gpu:1 # request 1 GPU (of any kind) -#SBATCH -o slurm_array.%N.%A-%a.out -#SBATCH -e slurm_array.%N.%A-%a.err -#SBATCH --mail-type=ALL -#SBATCH --mail-user=s.minano@ucl.ac.uk -#SBATCH --array=0-9%5 - -#------- -# NOTE -# with "SBATCH --array=0-n%m" ---> runs n+1 separate jobs, but not more than m at a time. -# the number of array jobs should match the number of input files -#------- - -# --------------------- -# Load required modules -# ---------------------- -module load SLEAP - -# ---------------------- -# Input data -# ---------------------- -# INPUT_DIR=/ceph/zoo/raw/CrabField/ramalhete_2021 -# TODO: have list here? -# INPUT_DATA_LIST=($( Date: Mon, 20 Nov 2023 16:56:37 +0100 Subject: [PATCH 31/31] a draft bash script for running frame extraction with entry points --- .../run_frame_extraction_w_entry_points.sh | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 bash_scripts/run_frame_extraction_w_entry_points.sh diff --git a/bash_scripts/run_frame_extraction_w_entry_points.sh b/bash_scripts/run_frame_extraction_w_entry_points.sh new file mode 100644 index 00000000..2667a571 --- /dev/null +++ b/bash_scripts/run_frame_extraction_w_entry_points.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +#SBATCH -p gpu # partition +#SBATCH -N 1 # number of nodes +#SBATCH --mem 64G # memory pool for all cores +#SBATCH -n 2 # number of cores +#SBATCH -t 3-00:00 # time (D-HH:MM) +#SBATCH --gres gpu:1 # request 1 GPU (of any kind) +#SBATCH -o slurm_array.%N.%A-%a.out +#SBATCH -e slurm_array.%N.%A-%a.err +#SBATCH --mail-type=ALL +#SBATCH --mail-user=s.minano@ucl.ac.uk + +# Run this script as +# sbatch --array=0-n%m run_frame_extraction_w_entry_points.sh --config=input.json +# +# The idea is that this script changes as little as possible! +# Instead the input.json is the only file modified, and its content is printed to the logs +# +# NOTE for the optional argument "-array=0-n%m": +# runs n separate jobs, but not more than m at a time. +# the number of array jobs should match the number of input files + + +# --------------------- +# Create conda env +# ---------------------- +# conda env create +# git clone repo +# pip install package + + + +# ---------------------- +# Input config +# ---------------------- +# Print full json file to logs +# https://www.baeldung.com/linux/jq-command-json#1-prettify-json + +# Check json +# Some config fields are mandatory + + +# Define defaults for optional fields +# To use if not defined in config +LOG_DIR=$OUTPUT_DIR/$OUTPUT_SUBDIR/logs +REENCODED_VIDEOS_SUBDIR=$REENCODED_VIDEOS_DIR/$OUTPUT_SUBDIR +# flag_reencode_input_videos + +# ---------------------- +# Input data +# ---------------------- +# Read input videos from json file +# https://jqlang.github.io/jq/ +# INPUT_DATA_LIST=() + +# Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT +# if not, exit +if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then + echo "The number of array tasks does not match the number of inputs" + exit 1 +fi + + +# ---------------------- +# Output locations +# ---------------------- +# Read output dir and subdir from json +# OUTPUT_DIR=/ceph/zoo/users/sminano/crabs_bboxes_labels +# OUTPUT_SUBDIR="Sep2023_day4_reencoded" + +# Create location of SLURM logs +mkdir -p $LOG_DIR # create if it doesnt exist + +# read reencoding flag from json +# flag_reencode_input_videos +# https://stackoverflow.com/a/28185962 + +# Define location of reencoded videos if required +if [ "$flag_reencode_input_videos" = true ] ; then + # Read reencoded dir from json + # REENCODED_VIDEOS_DIR=/ceph/zoo/users/sminano/crabs_reencoded_videos + # REENCODED_VIDEOS_SUBDIR=$REENCODED_VIDEOS_DIR/$OUTPUT_SUBDIR + mkdir -p $REENCODED_VIDEOS_SUBDIR # create if it doesnt exist +fi + + +# ------------------------ +# Command line tool +# ------------------------ +for i in {1..${SLURM_ARRAY_TASK_COUNT}} +do + # Input video + SAMPLE=${INPUT_DATA_LIST[${SLURM_ARRAY_TASK_ID}]} + echo "Input video: $SAMPLE" + echo "--------" + + # -------------------------- + # Reencode video - if required (CLI tool) + # -------------------------- + echo "Reencoding ..." + reencode-video ... + + # # Check status + # if [ "$?" -ne 0 ]; then + # echo "Reencoding failed! Please check .err log" + # else + # echo "Reencoded video: $REENCODED_VIDEO_PATH" + # fi + # echo "--------" + + + # ------------------- + # Extract frames + # ------------------- + echo Extracting frames + extract-frames ... + + # # Check status + # if [ "$?" -ne 0 ]; then + # echo "Frame extraction failed! Please check .err log" + # else + # echo "Frames extracted from video: $FRAME_EXTRACTION_INPUT_VIDEO" + # fi + # echo "--------" + + + # ------------------- + # Logs + # ------------------- + # Reencoded videos log + # copy .err file to go with reencoded video too if required + # filename: {reencoded video name}.{slurm_array}.{slurm_job_id} + # TODO: make a nicer log + if [ "$flag_reencode_input_videos" = true ] ; then + for ext in err out + do + cp slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext \ + /$REENCODED_VIDEOS_SUBDIR/"$filename_no_ext"_RE.slurm_array.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext + done + fi + + # Frame extraction logs + # Move logs for this job to subdir with extracted frames + for ext in err out + do + mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext /$LOG_DIR + done +done