Skip to content

Commit

Permalink
Frame extraction with reencoding (#78)
Browse files Browse the repository at this point in the history
* edit bash script for Aug23 day3

* draft bash script that copies logs to destination and reencodes videos

* add reencoding step

* add reencoding step and log ffmpeg params (WIP)

* frame extraction with logs but without reencoding

* reencoding and frame extraction for day2

* add loops to logs. change filename of reencoded logs.

* remove parent directory name from extracted image filename

* script for day3 job

* fix for loop

* repeat reencoded failed jobs

* Sep2023 day4 job

* repeat day 1-04 and 05 on reencoded with less frames

* add clarification for array job syntax

* remove logs comment

* remove parent directory from name of extracted frame

* clarify TODO about all files in directory

* clarify sbatch syntax for array job

* add option to reencode or not the videos

* print to log if frame extraction fails

* fix path for new structure

* clarify reencoding is optional

* move check earlier

* check just below input list

* fix number of array jobs

* actually fix number of array jobs...

* fix path and if statements

* derive extension from input video

* bash script for day4 01-Right rep

* delete frame extraction only script and frame extraction local

* clarify array jobs syntax

* rename reencoding only script

* add couple of zeros to frame number when saving file

* use parameter expansion for move command
  • Loading branch information
sfmig authored Nov 22, 2023
1 parent 72e22c1 commit aa2fca4
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 140 deletions.
91 changes: 0 additions & 91 deletions bash_scripts/run_frame_extraction_array.sh

This file was deleted.

45 changes: 0 additions & 45 deletions bash_scripts/run_frame_extraction_local.sh

This file was deleted.

162 changes: 162 additions & 0 deletions bash_scripts/run_reencode_and_frame_extraction_array.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/bin/bash

#SBATCH -p gpu # partition
#SBATCH -N 1 # number of nodes
#SBATCH --mem 64G # memory pool for all cores
#SBATCH -n 2 # number of cores
#SBATCH -t 3-00:00 # time (D-HH:MM)
#SBATCH --gres gpu:1 # request 1 GPU (of any kind)
#SBATCH -o slurm_array.%N.%A-%a.out
#SBATCH -e slurm_array.%N.%A-%a.err
#SBATCH --mail-type=ALL
#SBATCH [email protected]
#SBATCH --array=0-0%5


# NOTE on SBATCH command for array jobs
# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time.
# the number of array jobs should match the number of input files

# ---------------------
# Load required modules
# ----------------------
module load SLEAP

# ----------------------
# Input data
# ----------------------
# TODO: change to all files in a directory?
# INPUT_DIR=/ceph/zoo/raw/CrabField/ramalhete_2023
# INPUT_DATA_LIST=($(<input.list))
INPUT_DATA_LIST=(
"/ceph/zoo/users/sminano/crabs_reencoded_videos/Sep2023_day4_reencoded/07.09.2023-01-Right_RE.mp4"
)

# Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT
# if not, exit
if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#INPUT_DATA_LIST[@]} ]]; then
echo "The number of array tasks does not match the number of inputs"
exit 1
fi

# ----------------------
# Video reencoding
# ----------------------
# set whether to reencode input videos or not
flag_reencode_input_videos=false

# ----------------------
# Output data location
# ----------------------
# location of extracted frames
# TODO: derive subdir name from parent dir
OUTPUT_DIR=/ceph/zoo/users/sminano/crabs_bboxes_labels
OUTPUT_SUBDIR="Sep2023_day4_reencoded"

# location of SLURM logs
LOG_DIR=$OUTPUT_DIR/$OUTPUT_SUBDIR/logs
mkdir -p $LOG_DIR # create if it doesnt exist

# set location of reencoded videos if required
if [ "$flag_reencode_input_videos" = true ] ; then
REENCODED_VIDEOS_DIR=/ceph/zoo/users/sminano/crabs_reencoded_videos
REENCODED_VIDEOS_SUBDIR=$REENCODED_VIDEOS_DIR/$OUTPUT_SUBDIR
mkdir -p $REENCODED_VIDEOS_SUBDIR # create if it doesnt exist
fi

# ---------------------------------
# Frame extraction parameters
# -----------------------------------
PARAM_INI_SAMPLES=500
PARAM_SCALE=0.5
PARAM_N_COMPONENTS=5
PARAM_N_CLUSTERS=5
PARAM_PER_CLUSTER=4


# ----------------------
# Script location
# ----------------------
# assumes repo located at '/ceph/scratch/sminano'
SCRATCH_PERSONAL_DIR=/ceph/scratch/sminano
SCRIPT_DIR=$SCRATCH_PERSONAL_DIR/crabs-exploration/crabs/bboxes_labelling

# -------------------
# Run python script
# -------------------
for i in {1..${SLURM_ARRAY_TASK_COUNT}}
do
# Input video
SAMPLE=${INPUT_DATA_LIST[${SLURM_ARRAY_TASK_ID}]}
echo "Input video: $SAMPLE"
echo "--------"

# Reencode video if required
# following SLEAP's recommendations
# https://sleap.ai/help.html#does-my-data-need-to-be-in-a-particular-format
if [ "$flag_reencode_input_videos" = true ] ; then
echo "Rencoding ...."

# path to reencoded video
filename_no_ext="$(basename "$SAMPLE" | sed 's/\(.*\)\..*/\1/')" # filename without extension
REENCODED_VIDEO_PATH="$REENCODED_VIDEOS_SUBDIR/$filename_no_ext"_RE.$reencoded_extension

ffmpeg -version # print version to logs
ffmpeg -y -i "$SAMPLE" \
-c:v libx264 \
-pix_fmt yuv420p \
-preset superfast \
-crf 15 \
$REENCODED_VIDEO_PATH


echo "Reencoded video: $REENCODED_VIDEO_PATH"
echo "--------"
FRAME_EXTRACTION_INPUT_VIDEO=$REENCODED_VIDEO_PATH
else
echo "Skipping video reencoding..."
echo "--------"
FRAME_EXTRACTION_INPUT_VIDEO=$SAMPLE
fi

# Get extension of input video
video_filename=$(basename -- "$FRAME_EXTRACTION_INPUT_VIDEO")
VIDEO_EXT="${video_filename##*.}"

# Run frame extraction algorithm on video
python $SCRIPT_DIR/extract_frames_to_label_w_sleap.py \
$FRAME_EXTRACTION_INPUT_VIDEO \
--output_path $OUTPUT_DIR \
--output_subdir $OUTPUT_SUBDIR \
--video_extensions $VIDEO_EXT \
--initial_samples $PARAM_INI_SAMPLES \
--scale $PARAM_SCALE \
--n_components $PARAM_N_COMPONENTS \
--n_clusters $PARAM_N_CLUSTERS \
--per_cluster $PARAM_PER_CLUSTER \
--compute_features_per_video

if [ "$?" -ne 0 ]; then
echo "Frame extraction failed! Please check .err log"
else
echo "Frames extracted from video: $FRAME_EXTRACTION_INPUT_VIDEO"
fi
echo "--------"

# Reencoded videos log
# copy .err file to go with reencoded video too if required
# filename: {reencoded video name}.{slurm_array}.{slurm_job_id}
# TODO: make a nicer log
if [ "$flag_reencode_input_videos" = true ] ; then
for ext in err out
do
cp slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext \
/$REENCODED_VIDEOS_SUBDIR/"$filename_no_ext"_RE.slurm_array.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.$ext
done
fi

# Frame extraction logs
# Move logs for this job to subdir with extracted frames
mv slurm_array.$SLURMD_NODENAME.$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID.{err,out} /$LOG_DIR

done
File renamed without changes.
6 changes: 2 additions & 4 deletions crabs/bboxes_labelling/extract_frames_to_label_w_sleap.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,10 @@ def extract_frames_to_label_from_video(
raise KeyError(msg)

# If read successfully: save to file
# file naming format: parentdir_videoname_frame_XXX.png
# file naming format: videoname_frame_XXX.png
else:
file_path = video_output_dir / Path(
f"{Path(vid_str).parent.stem}_"
f"{Path(vid_str).stem}_"
f"frame_{frame_idx:06d}.png",
f"{Path(vid_str).stem}_" f"frame_{frame_idx:08d}.png",
)
img_saved = cv2.imwrite(str(file_path), frame)
if img_saved:
Expand Down

0 comments on commit aa2fca4

Please sign in to comment.