summaryrefslogtreecommitdiff
path: root/iedupes.sh
diff options
context:
space:
mode:
Diffstat (limited to 'iedupes.sh')
-rwxr-xr-xiedupes.sh135
1 files changed, 135 insertions, 0 deletions
diff --git a/iedupes.sh b/iedupes.sh
new file mode 100755
index 0000000..f70e593
--- /dev/null
+++ b/iedupes.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+
+# trap ctrl-c and call ctrl_c()
+
+function ctrl_c() {
+ echo "ABORTING DUPE REVIEW"
+ cat "$exceptionsfile" | (read -r; printf "%s\n" "$REPLY"; sort -u) > "${sorted_exceptionsfile}"
+ mv -f "${sorted_exceptionsfile}" "$exceptionsfile"
+ [[ -f "${optimized_exceptionsfile}" ]] && rm -f "${optimized_exceptionsfile}"
+ exit -1
+}
+
+trap ctrl_c SIGINT SIGTERM
+
+
+IE="$(pwd)"
+exceptionsfile="${IE}/exceptions.txt"
+optimized_exceptionsfile=""
+sorted_exceptionsfile="$( mktemp /tmp/sorted_exceptions_XXXXXXXX.txt )"
+
+optimization_period="7"
+optimized_curr="$(( $(date +"%s") / 86400 ))"
+skip_flag=false
+DEFAULT_SIMILARITY="Small"
+
+while getopts "hy" opt; do
+ case $opt in
+ h) echo -e "usage: $0 [-h help] [-y automatically add exceptions]"; exit ;;
+ y) skip_flag=true ;;
+ ?) echo "error: option -$OPTARG is not implemented"; exit ;;
+ esac
+done
+
+( [ -e "$exceptionsfile" ] || (touch "$exceptionsfile" && echo "##### LAST TIMESTAMP OF EXCEPTION FILE OPTIMIZATION (IN DAYS SINCE UNIX EPOCH): ${optimized_curr} #####" > "$exceptionsfile") ) && [ ! -w "$exceptionsfile" ] && echo "ERROR: Cannot write to $exceptionsfile" && exit 1
+
+optimized_last="$(cat "$exceptionsfile" | head -n 1 | grep -E "##### LAST TIMESTAMP OF EXCEPTION FILE.*#####" | grep -oE "[0-9]+")"
+[ -z "$optimized_last" ] && echo "ERROR: Cannot parse last optimization date of $exceptionsfile" && exit 1
+
+[[ -n $(find "$(pwd)" -regextype awk -iregex ".*[\[\]\(\)\{\}\*\+\?]+.*") ]] && echo -e "REGEX SYMBOLS FOUND IN FILENAMES, IE IN SHAMBLES. ABORTING.\n" && exit -1
+
+function optimize_exceptions {
+
+ optimized_exceptionsfile="$( mktemp /tmp/optimized_exceptions_XXXXXXXX.txt )"
+
+ optimized_date_rel=$(( optimized_curr - optimized_last ))
+ [[ $optimized_date_rel -ge $optimization_period ]] || return 1
+
+ echo "##### LAST TIMESTAMP OF EXCEPTION FILE OPTIMIZATION (IN DAYS SINCE UNIX EPOCH): ${optimized_curr} #####" > "${optimized_exceptionsfile}"
+
+ while read line; do
+ lineexpr=$(echo -e "$line" | sed -r "s| \/|\.*\/|g; s|^|\.*|g; s|$|\.*|g")
+ [[ -n $( grep -E "##### LAST TIMESTAMP OF EXCEPTION FILE.*#####" <<< "$line" ) || $(grep -a "$lineexpr" exceptions.txt | wc -l) -ge 2 ]] && continue || echo "$line" >> "${optimized_exceptionsfile}"
+ done < "$exceptionsfile"
+
+ exceptions_lines=$(wc -l < "$exceptionsfile")
+ optimized_exceptions_lines=$(wc -l < "${optimized_exceptionsfile}")
+ optimization_diff=$(( exceptions_lines - optimized_exceptions_lines ))
+ [[ $optimization_diff -gt 0 ]] || return 1
+
+ while true; do
+ read -p "Exceptions file (${exceptionsfile}) has been optimized (${optimization_diff} new optimizations, ${optimized_date_rel} days since last optimization). Would you like to overwrite the existing exceptions file (y/n)? " yn
+
+ case $( tr '[A-Z]' '[a-z]' <<< "$yn" ) in
+ y|yes) rm "$exceptionsfile" && cp "${optimized_exceptionsfile}" "$exceptionsfile" && echo "Exceptions file updated." ; break;;
+ n|no) echo "Update aborted." ; break;;
+ *) echo "Invalid response." ;;
+ esac
+ done
+
+ rm -f "${optimized_exceptionsfile}"
+}
+
+$skip_flag || optimize_exceptions
+
+re='^Minimal|VerySmall|Small|Medium|High|VeryHigh$'
+
+declare SIMILARITY
+
+if $skip_flag ; then
+ SIMILARITY="$DEFAULT_SIMILARITY"
+else
+ while read -p "Enter similarity preset [Default: Small; Allowed: Minimal, VerySmall, Small, Medium, High, VeryHigh]: " SIMILARITY; do
+
+ SIMILARITY=${SIMILARITY:-"$DEFAULT_SIMILARITY"}
+
+ if ! [[ $SIMILARITY =~ $re ]]; then
+ echo "error: Invalid input" >&2; continue
+ fi
+
+ break
+ done
+fi
+
+
+mapfile -t matches < <( czkawka-cli image -d "$IE" -s "$SIMILARITY" | sed -r "s| \- [0-9]*x[0-9]* \- [0-9]*(\.[0-9]*)? [KMGTP]iB \- [a-zA-Z ]*$||g ; s|[0-9]+ images which have similar friends||g ; s|\"||g ; s|^Found ||g ; s|.*-------MESSAGES-------.*||g ; s|.*Properly loaded.*||g ; s|.*Properly saved to file.*||g " \
+| head -n -5 | awk -v RS= '{$1=$1; t=!/\//; if(NR>1 && t) print ""; print; if(t) print ""}' )
+
+
+count=0
+
+for match in "${matches[@]}"
+do
+ matches[$count]=$( echo "$match" | sed -r "s|\ \/|\n\/|g" | sort | tr '\n' ' ' | sed -r "s| $|\n|" )
+ ((count++))
+done
+
+
+printf '%s\n' "${matches[@]}" | while IFS= read -r line
+do
+ lineexpr=$(echo -e "$line" | sed -r "s| \/|\.*\/|g; s|^|\.*|g; s|$|\.*|g")
+ [[ -n $(grep -a -E "$lineexpr" "$exceptionsfile") ]] && continue || echo "$lineexpr"
+ if $skip_flag ; then
+ echo "$line" >> "$exceptionsfile"
+ else
+ echo -e "$line" | sed -r "s|\ \/home\/|\n\/home\/|g" | /bin/feh --info "printf '%S %wx%h'" --zoom max --scale-down -g 1280x720 -B black -d --action1 "gio trash %F" --action2 "cat %L | sed -r 's|^\.|$(pwd)|g' | sort | tr '\n' ' ' | paste -s -d ' ' | sed 's| $||g' >> \"$exceptionsfile\"" --action3 "$HOME/stuf/scripts/scriptlets/feh_action3.sh %F %L" -f -
+ fi
+done
+
+cat "$exceptionsfile" | (read -r; printf "%s\n" "$REPLY"; sort -u) > "${sorted_exceptionsfile}"
+mv -f "${sorted_exceptionsfile}" "$exceptionsfile"
+
+# EVERYTHING IS HOW IT'S SUPPOSED TO BE, THERE CAN BE NO IMPROVEMENTS, NOTHING ELSE WORKED
+# | sed -e "s|\(\/.*\)|\"\1\"|g"
+# ; s|\ |\\\ |g
+
+#THIS IS BEST PRACTICE FOR ENVIRONMENT VARIABLES, CONSIDER CORRECTING ~/stuf STUFF
+# -t 95% WORKS FOR ELIMINATING THE CHAFF, BUT IT MIGHT MISS SOME
+
+#feh . --action1 "cat %L | sed -r 's|^\.|$(pwd)|g' | sort | tr '\n' ' ' "
+#czkawka_cli image -d "/home/jay/Tempsktop" -s "High" | sed -r "s| \- [0-9]*x[0-9]* \- [0-9]*\.[0-9]* [KMGTP]iB \- [a-zA-Z ]*$||g ; s|Found [0-9]+ images which have similar friends||g " | head -n -5 | awk -v RS= '{$1=$1; t=!/\//; if(NR>1 && t) print ""; print; if(t) print ""}' | xargs -n 1| sort | xargs
+
+
+# IF IT WORKS WITH QUOTES, ADD QUOTES
+