#! /usr/bin/env bash
#
# Usage:  btest-diff [options] <filename>
#
# These environment variables are set by btest:
#   TEST_MODE={TEST|UPDATE|UPDATE_INTERACTIVE}
#   TEST_BASELINE
#   TEST_DIAGNOSTICS
#   TEST_NAME
#
# A test can optionally set these environment variables:
#   TEST_DIFF_CANONIFIER
#   TEST_DIFF_BRIEF
#   TEST_DIFF_FILE_MAX_LINES
#
# This script has the following exit codes:
#
# When TEST_MODE is TEST:
# 0 - Comparison succeeded, files are the same
# 1 - Problems with input file/args or running TEST_DIFF_CANONIFIER, or file contents differ
# 2 - Other diffing trouble (inherited from diff)
# 100 - No baseline to compare to available
#
# When TEST_MODE is UPDATE:
# 0 - Baseline updated
# 1 - Problems with input file/args or running TEST_DIFF_CANONIFIER
#
# When TEST_MODE is UPDATE_INTERACTIVE:
# 0 - Baseline updated, or nothing to update
# 1 - Problems with input file/args or running TEST_DIFF_CANONIFIER, or user skips a deviating baseline
# 200 - User asks to abort after a deviating baseline
#
# Otherwise: exits with 1

# It's okay to check $? explicitly:
# shellcheck disable=SC2181

# Maximum number of lines to show from mismatching input file by default.
MAX_LINES=100

# Header line we tuck onto new baselines generated by
# btest-diff. Serves both as a warning and as an indicator that the
# baseline has been run through the TEST_DIFF_CANONIFIER (if any).
HEADER="### BTest baseline data generated by btest-diff. Do not edit. Use \"btest -U/-u\" to update. Requires BTest >= 0.63."

# btest-diff supports a binary mode to simplify the handling of files
# that are better treated as binary blobs rather than text files. In
# binary mode, we treat the input file as-is, meaning:
#
# - only check whether input and baseline are identical
# - don't prepend our btest header line when updating baseline
# - don't canonify when updating baseline
#
BINARY_MODE=

is_binary_mode() {
    test -n "$BINARY_MODE"
}

# Predicate, succeeds if the given baseline is canonicalized.
is_canon_baseline() {
    local input="$1"

    # The baseline is canonicalized when we find our header in it. To
    # allow for some wiggle room in updating the wording in the header
    # in the future, we don't fix the exact string, and end after the
    # "Do not edit." sentence.
    local header=$(echo "$HEADER" | sed -E 's/Do not edit\..*/Do not edit./')

    if head -n 1 "$input" | grep -q -F "$header" 2>/dev/null; then
        return 0
    fi

    return 1
}

# Prints the requested baseline to standard out if it is canonicalized
# or we're using binary mode. Otherwise fails and prints nothing.
get_baseline() {
    local input="$1"

    if is_binary_mode; then
        cat "$input"
        return 0
    fi

    ! is_canon_baseline "$input" && return 1
    tail -n +2 "$input"
}

# Updates the given baseline to the given filename inside the *first*
# baseline directory. Prepends our header if we're not in binary mode.
update_baseline() {
    local input="$1"
    local output="${baseline_dirs[0]}/$2"

    if ! is_binary_mode; then
        echo "$HEADER" >"$output"
        cat "$input" >>"$output"
    else
        cat "$input" >"$output"
    fi
}

# ---- Main program ----------------------------------------------------

while [ "$1" != "" ]; do
    case "$1" in
        "--binary")
            BINARY_MODE=1
            shift
            ;;
        *)
            break
            ;;
    esac
done

if [ -n "$TEST_DIFF_FILE_MAX_LINES" ]; then
    MAX_LINES=$TEST_DIFF_FILE_MAX_LINES
fi

if [ "$TEST_DIAGNOSTICS" = "" ]; then
    TEST_DIAGNOSTICS=/dev/stdout
fi

if [ "$#" -lt 1 ]; then
    echo "btest-diff: wrong number of arguments" >$TEST_DIAGNOSTICS
    exit 1
fi

# Split string with baseline directories into array.
IFS=':' read -ra baseline_dirs <<< "$TEST_BASELINE"

input="$1"
# shellcheck disable=SC2001
canon=$(echo "$input" | sed 's#/#.#g')
shift

if [ ! -f "$input" ]; then
    echo "btest-diff: input $input does not exist." >$TEST_DIAGNOSTICS
    exit 1
fi

tmpfiles=""
delete_tmps() {
    rm -f $tmpfiles 2>/dev/null
}

trap delete_tmps 0

# First available baseline across directories.
baseline=""
for dir in "${baseline_dirs[@]}"; do
    test -f "$dir/$canon" && baseline="$dir/$canon" && break
done

result=2

rm -f $TEST_DIAGNOSTICS  2>/dev/null

echo "== File ===============================" >>$TEST_DIAGNOSTICS

if [ -z "$baseline" ]; then
    cat "$input" >>$TEST_DIAGNOSTICS
elif [ -n "$TEST_DIFF_BRIEF" ]; then
    echo "<Content not shown>" >>$TEST_DIAGNOSTICS
else
    if [ "$(wc -l "$input" | awk '{print $1}')" -le "$MAX_LINES" ]; then
        cat "$input" >>$TEST_DIAGNOSTICS
    else
        head -n "$MAX_LINES" "$input" >>$TEST_DIAGNOSTICS
        echo "[... File too long, truncated ...]" >>$TEST_DIAGNOSTICS
    fi
fi

# If no canonifier is defined, just copy. Simplifies code layout.
# In binary mode, always just copy.
if [ -z "$TEST_DIFF_CANONIFIER" ] || is_binary_mode; then
    TEST_DIFF_CANONIFIER="cat"
fi

canon_output=/tmp/test-diff.$$.$canon.tmp
tmpfiles="$tmpfiles $canon_output"
error=0

# Canonicalize the new test output.
# shellcheck disable=SC2094
eval "$TEST_DIFF_CANONIFIER" "$input" <"$input" >"$canon_output"
if [ $? -ne 0 ]; then
    echo "== Error ==============================" >>$TEST_DIAGNOSTICS
    echo "btest-diff: TEST_DIFF_CANONIFIER failed on file '$input'" >>$TEST_DIAGNOSTICS
    error=1
    result=1
fi

if [ -n "$baseline" ]; then
    canon_baseline=/tmp/test-diff.$$.$canon.baseline.tmp
    tmpfiles="$tmpfiles $canon_baseline"

    # Prepare the baseline. When created by a recent btest-diff, we
    # don't need to re-canonicalize, otherwise we do.
    if ! get_baseline "$baseline" >"$canon_baseline"; then
        # It's an older uncanonicalized baseline, so canonicalize
        # it now prior to comparison. Future updates via btest
        # -U/-u will then store it canonicalized.
        eval "$TEST_DIFF_CANONIFIER" "$baseline" <"$baseline" >"$canon_baseline"
        if [ $? -ne 0 ]; then
            echo "== Error ==============================" >>$TEST_DIAGNOSTICS
            echo "btest-diff: TEST_DIFF_CANONIFIER failed on file '$baseline'" >>$TEST_DIAGNOSTICS
            error=1
            result=1
        fi
    fi

    if [ $error -eq 0 ]; then
        echo "== Diff ===============================" >>$TEST_DIAGNOSTICS
        if is_binary_mode; then
            diff -s "$@" "$canon_baseline" "$canon_output" >>$TEST_DIAGNOSTICS
        else
            diff -au "$@" "$canon_baseline" "$canon_output" >>$TEST_DIAGNOSTICS
        fi
        result=$?
    fi
elif [ "$TEST_MODE" = "TEST" ]; then
    echo "== Error ==============================" >>$TEST_DIAGNOSTICS
    echo "test-diff: no baseline found." >>$TEST_DIAGNOSTICS
    result=100
fi

echo "=======================================" >>$TEST_DIAGNOSTICS

if [ "$TEST_MODE" = "TEST" ]; then
    exit $result

elif [ "$TEST_MODE" = "UPDATE_INTERACTIVE" ]; then

    # We had a problem running the canonifier
    if [ "$error" != 0 ]; then
        exit 1
    fi

    # There's no change to the baseline, so skip user interaction
    if [ "$result" = 0 ]; then
        exit 0
    fi

    btest-ask-update
    rc=$?

    echo -n "$TEST_NAME ..." >/dev/tty

    if [ $rc = 0 ]; then
        update_baseline "$canon_output" "$canon"
        exit 0
    fi

    exit $rc

elif [ "$TEST_MODE" = "UPDATE" ]; then

    # We had a problem running the canonifier
    if [ "$error" != 0 ]; then
        exit 1
    fi

    update_baseline "$canon_output" "$canon"
    exit 0
fi

echo "test-diff: unknown test mode $TEST_MODE" >$TEST_DIAGNOSTICS
exit 1
