-
Notifications
You must be signed in to change notification settings - Fork 16
/
find-duplicate-files.sh
executable file
·131 lines (103 loc) · 2.27 KB
/
find-duplicate-files.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
# find duplicate files (first based on size, then MD5 hash)
exit_script()
{
# Default exit code is 1
local exit_code=1
local re
re='^([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])$'
if echo "$1" | grep -qE "$re"; then
exit_code=$1
shift
fi
re='[[:alnum:]]'
if echo "$@" | grep -iqE "$re"; then
if [ "$exit_code" -eq 0 ]; then
echo "INFO: $*"
else
echo "ERROR: $*" 1>&2
fi
fi
# Print 'aborting' string if exit code is not 0
[ "$exit_code" -ne 0 ] && echo "Aborting script..."
exit "$exit_code"
}
usage()
{
# Prints out usage and exit.
sed -e "s/^ //" -e "s|SCRIPT_NAME|$(basename "$0")|" << EOF
USAGE
This script scans for duplicate files.
SYNTAX
SCRIPT_NAME [OPTIONS] ARGUMENTS
ARGUMENTS
directory The staring directory to search from.
OPTIONS
-v, --verbose Make the script more verbose.
-h, --help Prints this usage.
EOF
exit_script "$@"
}
test_arg()
{
# Used to validate user input
local arg="$1"
local argv="$2"
if [ -z "$argv" ]; then
if echo "$arg" | grep -qE '^-'; then
usage "Null argument supplied for option $arg"
fi
fi
if echo "$argv" | grep -qE '^-'; then
usage "Argument for option $arg cannot start with '-'"
fi
}
test_path()
{
# test directory argument
local arg="$1"
test_arg "$arg"
if [ ! -d "$arg" ]; then
usage "Specified directory does not exist."
fi
}
SEARCH_DIR=""
VERBOSITY=0
# process arguments
[ $# -gt 0 ] || usage
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
usage
;;
-v|--verbose)
((VERBOSITY++))
shift
;;
*)
test_path "$1"
SEARCH_DIR=$(readlink -m "$1")
shift
;;
esac
done
if [ -z "${SEARCH_DIR}" ]; then
usage
fi
if [ $VERBOSITY -gt 0 ]; then
echo >&2 "Scanning for duplicate files in $SEARCH_DIR ..."
fi
find "$SEARCH_DIR" -type f -exec md5sum '{}' ';' \
| sort \
| uniq --all-repeated=separate -w 33
#find "$SEARCH_DIR" -not -empty -type f -printf "%s\n" \
# | sort -rn \
# | uniq -d \
# | xargs -I{} -n1 find -type f -size {}c -print0 \
# | xargs -0 md5sum \
# | sort \
# | uniq -w32 --all-repeated=separate
if [ $VERBOSITY -gt 0 ]; then
echo >&2 "Finished."
fi
exit_script 0