# Determine the snapshot date to use.

# Though prepared in the form of a function's definition, this phase B is
# not called yet, but called during the loop that drives phases C and D.
#
# When called, the function sets these shell variables for external use:
#
#     SNAPSHOT_SSDATE usable in a URL at snapshot.debian.org
#     SNAPSHOT_D2DATE the same date, but in date(1)-compatible format
#
# The function also leaves these files in $TMPDIR/:
#
#     snapshot.stamp    the snapshot date again, but as a timestamp
#     phb-release.stamp for later comparison against release.stamp
#     phb-html-parses/* (possibly interesting)
#     phb-internal/*    (probably uninteresting)
#
# The variable DIST must already be set before the function is called.
# Also, the html file $TMPDIR/mirror-checks/$SNAPSHOT_TOC1 must already
# have been fetched.
#
# Various development notes follow.
#
# TRIPLICATION OF THE SNAPSHOT DATE
#
# Are three distinct means of returning a single snapshot date overkill?
# Answer: perhaps, but the triplicate results from the manner in which this
# program has evolved. The triplicate could be reduced/combined with modest
# programming effort if future development revealed that the triplicate
# posed a practical problem, but reduction/combination seems unnecessary at
# the moment.
#
# Conceptually, determining the right snapshot date should be pretty easy.
# A sufficiently experienced Debian user could do it manually in a few
# minutes by following a three-line instruction in English: "Find the
# latest backports release available on snapshot that precedes or equals
# the regular release. If two snapshots have captured the same release,
# prefer the later snapshot." However, because the task requires the
# parsing of human-readable web pages and for other reasons, teaching a
# shell script to complete the task automatically with a passably useful
# degree of reliability -- without invoking tools like Perl that would make
# it hard for future maintainers unfamiliar with the tools to maintain the
# script -- wants 200 lines or so of code, as follows. And even if Perl
# were used, the code would still not be short.
#
# DUPLICATION OF THE TIMESTAMP; CHOICE OF BASH AS A SCRIPTING LANGUAGE
#
# The phb-release.stamp is arguably unnecessary, since a
# release.stamp (without the phb-), which ought to be identical, should
# already exist. However, Bash as a scripting language is weakly scoped,
# lacks support for lazy instantiation, and has little support for compound
# objects of user-defined type, so the separate phb-release.stamp is kept
# to cause certain, inadvertently introduced potential future bugs to
# manifest themselves in a more obvious way. Without the separate
# phb-release.stamp, the program as presently designed feels too brittle.
#
# It could be argued that the last is sufficient reason to have used
# something other than Bash as a scripting language. Perhaps that argument
# is right, but the program seems not unlikely to require significant
# future maintenance on a two-year cycle (that is, upon each major release
# of Debian), possibly by persons other than the original author. Bash is
# already known to practically all potential future maintainers. Moreover,
# whatever Bash's shortcomings (whether real or perceived), Bash is
# especially suited to moving files around -- which, of course, is what
# this program chiefly does.
#
# Meanwhile, the release.stamp duplication makes comparison of the
# duplicates necessary at the end of phase D, but that's all right. That's
# what catches the bugs. Indeed, it has already caught one, now fixed.
#
# IMPROPER BASH FUNCTIONS
#
# Unlike functions defined in 10function-definitions.bash, this function is
# meant to be called only once during the program's run. It is designed not
# as a proper function but merely as a device to keep the code of phase B
# conceptually separate from that of phases C and D (though in the thread
# of execution, half of each of phase C and phase D actually runs before
# phase B begins).
#
# Archaic programming languages like FORTRAN and BASIC would have called an
# improper function like this one a "subroutine."

readonly PHB_MSG10=$(gettext\
 'the list of months given by %s seems to be empty')
readonly PHB_MSG20=$(gettext\
 '%s does not cover the month %s of the regular release')
readonly PHB_MSG30=$(gettext\
 '%s covers too little before the month %s of the regular release')
readonly PHB_MSG35=$(gettext\
 'snapshot did not respond: unrecoverable')
readonly PHB_MSG40=$(gettext\
 '%s has no snapshots during the months near the month of the regular release')
readonly PHB_MSG50=$(gettext\
 '%s does not seem to cover the date and time of the regular release')
readonly PHB_MSG60=$(gettext\
 '%s seems to have too few snapshots that precede the regular release')
readonly PHB_MSG70=$(gettext\
 '%s seems to have captured no backports release that succeeds the regular release')
readonly PHB_MSG80=$(gettext\
 'a least one backport is found that succeeds the regular release, which is good, but none are found that precede or equal it')
readonly PHB_MSG90=$(gettext\
 'an internal glitch regarding indices has arisen while recording the snapshot date')

declare SNAPSHOT_SSDATE=''
declare SNAPSHOT_D2DATE=''
function execute_phase_B {

    mkdir $VV -- $TMPDIR/phb-internal

    # Extract the regular release date from the corresponding Release file.
    stamp_per_release "$TMPDIR/phb-release.stamp"\
     "$TARGET/dists/$DIST/Release"
    local PHB_REGULAR_RELEASE_MONTH;\
     readonly PHB_REGULAR_RELEASE_MONTH="$(\
     month_per_stamp "$TMPDIR/phb-release.stamp")"

    # Phase A has already fetched the web
    # page, $TMPDIR/mirror-checks/$SNAPSHOT_TOC1, that lists the years and
    # months. Extract and list the years and months in plain text,
    # as "197102" for Feb. 1971.
    mkdir $VV -- "$TMPDIR/phb-html-parses"
    local PHB_PAT1 PHB_PAT2 PHB_PAT3 PHB_PAT
    readonly PHB_PAT1='^[^<>]*<\s*a\s+href\s*=\s*"[^"]*'
    readonly PHB_PAT2='year=([[:digit:]]+)[^"]*'
    readonly PHB_PAT3='month=([[:digit:]]+)[^"]*"\s*/?\s*>.*$'
    readonly PHB_PAT="${PHB_PAT1}${PHB_PAT2}${PHB_PAT3}"
    sed -rn\
     -e '\%<h1>[^<>]*</h1>%ba;d;:a;n;'\
     -e "s%${PHB_PAT}%\\1\\2%p;"\
     -e 'ba;' --\
     "$TMPDIR/mirror-checks/$SNAPSHOT_TOC1"\
     | sed -r 's%^([[:digit:]]{4})([[:digit:]])$%\10\2%'\
     >$TMPDIR/phb-html-parses/months.list

    # Approximate overview of procedure:
    #
    #   * Read TOC2 for several months: $N_FLANKING_MONTHS before, one
    #     during, and $N_FLANKING_MONTHS after the regular release. (If a
    #     month after is unavailable, omit it.)
    #   * Compile a single list of snapshot dates from the months.
    #   * Scanning late to early, identify the latest snapshot that
    #     precedes or equals the regular release. (Die if that's the latest
    #     in the list.)
    #   * Step $SNAPSHOT_MARGIN snapshots older. (This step should be
    #     unnecessary except that snapshot's clock might be imperfectly
    #     synchronized with backports' clock.)
    #   * Starting from there, scanning early to late, find the earliest
    #     snapshot whose backports release succeeds the regular release.
    #     (Die if it's the earliest remaining.)
    #   * Step one snapshot older.
    #

    # In TOC1, index the month of the regular release (of, if that specific
    # month is not in TOC1, then the nearest earlier month). Record the
    # index in PHB_INDEX.
    local PHB_N_MONTHS; declare -ir PHB_N_MONTHS=$(cat --\
     $TMPDIR/phb-html-parses/months.list | wc -l)
    (($PHB_N_MONTHS > 0)) || {
        die "$(printf "$PHB_MSG10" "$SNAPSHOT_SERVICE")"
    }
    local PHB_INDEX; declare -i PHB_INDEX=$PHB_N_MONTHS
    # Whether to 'declare -i' the next variable to be an integer is a
    # question of scripting/programming judgment. The variable does
    # get *used* at least partly as an integer, at any rate. For now,
    # it remains a string of text whose contents happen to be decimal digits.
    # The same goes for PHB_REGULAR_RELEASE_MONTH above. (The use below
    # of [-le] rather than ((<=)) is more or less arbitrary, just because the
    # variables are strings. The use happens to be POSIX-compliant, but the
    # whole program is so full of Bashisms that POSIX is not really a reason.
    # The maintainer lacks a significant preference. Either would work.)
    local PHB_MONTH1
    for PHB_MONTH1 in $(tac -- $TMPDIR/phb-html-parses/months.list); do
        [ $PHB_MONTH1 -le $PHB_REGULAR_RELEASE_MONTH ] && break
        ((--PHB_INDEX))
        (($PHB_INDEX < 1)) && {
            die "$(printf "$PHB_MSG20" "$SNAPSHOT_SERVICE"\
            "$PHB_REGULAR_RELEASE_MONTH")"
        }
    done
    (($PHB_INDEX > $N_FLANKING_MONTHS)) || {
        die "$(printf "$PHB_MSG30" "$SNAPSHOT_SERVICE"\
        "$PHB_REGULAR_RELEASE_MONTH")"
    }

    # Read the second-level tables of contents (TOC2) of nearby months.
    # Format and store the TOC2 of the nearby months together to the single
    # file $TMPDIR/phb-html-parses/snapshots.list, thereby compiling a
    # list of candidate snapshot dates.
    local PHB_PERIOD_DURATION; declare -i PHB_PERIOD_DURATION
    if (($PHB_INDEX + $N_FLANKING_MONTHS <= $PHB_N_MONTHS)); then
        PHB_PERIOD_DURATION=$((2*$N_FLANKING_MONTHS + 1))
    else
        PHB_PERIOD_DURATION=$((
            $PHB_N_MONTHS + $N_FLANKING_MONTHS + 1 - $PHB_INDEX
        ))
    fi
    readonly PHB_PERIOD_DURATION
    touch -- $TMPDIR/phb-html-parses/snapshots.list
    local PHB_PATM PHB_HBMS PHB_YEARONLY PHB_MONTHONLY
    readonly PHB_PATM='^[^<>]*<\s*a\s+href\s*=\s*"([^"]+)"\s*/?\s*>.*$'
    readonly PHB_HMBS="https://$SNAPSHOT_SERVICE/$ARCHIVE_NAME"
    # (Observe that PHB_MONTH1 has already been locally defined.)
    for PHB_MONTH1 in $(
        head -n$(($PHB_INDEX + $N_FLANKING_MONTHS)) --\
        $TMPDIR/phb-html-parses/months.list\
        | tail -n${PHB_PERIOD_DURATION}
    ); do
        PHB_YEARONLY="$(sed -r <<<"$PHB_MONTH1" -- 's/[[:digit:]]{2}$//')"
        PHB_MONTHONLY="$(sed -r <<<"$PHB_MONTH1" -- 's/^[[:digit:]]{4}//')"
        sed -rn\
         -e '\%<h1>[^<>]*</h1>%ba;d;:a;n;'\
         -e '\%<h2>[^<>]*</h2>%bb;ba;:b;n;'\
         -e "s%${PHB_PATM}%\\1%p;"\
         -e 'bb;'\
         <(
             wget -nH -O- --\
             "$PHB_HMBS/?year=$PHB_YEARONLY&month=$PHB_MONTHONLY"\
             || die "$PHB_MSG35"
         ) | sed -rn -- 's%/$%%;/^[[:digit:]]{8}T/p;'\
         >>$TMPDIR/phb-html-parses/snapshots.list
    done
    # (Observe that the value of PHB_INDEX, indexing a month, is no longer
    # needed. Thus, the next segment can reuse the variable PHB_INDEX for a
    # different purpose.)

    # Index by $PHB_INDEX the latest snapshot that precedes or equals
    # the regular release.
    local PHB_N_SNAPSHOTS; declare -ir PHB_N_SNAPSHOTS=$(cat --\
     $TMPDIR/phb-html-parses/snapshots.list | wc -l)
    (($PHB_N_SNAPSHOTS > 0)) || {
        die "$(printf "$PHB_MSG40" "$SNAPSHOT_SERVICE")"
    }
    # (Observe that PHB_INDEX has already been locally defined.)
    PHB_INDEX=$PHB_N_SNAPSHOTS
    local PHB_SNAPSHOT1
    for PHB_SNAPSHOT1 in\
        $(tac -- $TMPDIR/phb-html-parses/snapshots.list)
    do
        touch -d "$(convert_format_ss_to_d2 "$PHB_SNAPSHOT1")" --\
         "$TMPDIR/phb-internal/stamp.stamp"
        [ "$TMPDIR/phb-internal/stamp.stamp"\
         -nt "$TMPDIR/phb-release.stamp" ] || break
        ((--PHB_INDEX))
        (($PHB_INDEX < 1)) && {
            die "$(printf "$PHB_MSG50" "$SNAPSHOT_SERVICE")"
        }
    done

    # Step $SNAPSHOT_MARGIN snapshots older.
    (($PHB_INDEX > $SNAPSHOT_MARGIN)) || {
        die "$(printf "$PHB_MSG60" "$SNAPSHOT_SERVICE")"
    }
    PHB_INDEX=$(($PHB_INDEX - $SNAPSHOT_MARGIN))
    readonly PHB_INDEX

    # Index by $PHB_JNDEX the earliest snapshot whose backports release
    # succeeds the regular release.
    local PHB_JNDEX PHB_HAS_FOUND
    declare -i PHB_JNDEX=0 PHB_HAS_FOUND=0
    # (Observe that PHB_SNAPSHOT1 has already been locally defined.)
    for PHB_SNAPSHOT1 in $(<$TMPDIR/phb-html-parses/snapshots.list); do
        ((++PHB_JNDEX))
        if (($PHB_JNDEX >= $PHB_INDEX)); then
            wget -nH -O- >"$TMPDIR/phb-internal/wget-out.txt" --\
             "$PHB_HMBS/$PHB_SNAPSHOT1/dists/$DIST_BACKPORTS/Release"\
             || die "$PHB_MSG35"
            stamp_per_release "$TMPDIR/phb-internal/stamp.stamp"\
             "$TMPDIR/phb-internal/wget-out.txt"
            [\
                    "$TMPDIR/phb-internal/stamp.stamp"\
                -nt "$TMPDIR/phb-release.stamp"\
            ] && {
                PHB_HAS_FOUND=1
                break
            }
        fi
    done
    (($PHB_HAS_FOUND)) || {
        die "$(printf "$PHB_MSG70" "$SNAPSHOT_SERVICE")"
    }
    (($PHB_JNDEX > $PHB_INDEX)) || {
        die "$(printf "$PHB_MSG80")"
    }
    # Note that the last command guarantees that PHB_JNDEX can be reduced
    # by 1 without risk of its shrinking to less than PHB_INDEX.

    # Step one snapshot older.
    ((--PHB_JNDEX))
    readonly PHB_JNDEX

    # Set SNAPSHOT_SSDATE, SNAPSHOT_D2DATE, et al.
    local PHB_KNDEX=0
    PHB_HAS_FOUND=0 # reused
    for PHB_SNAPSHOT1 in $(<$TMPDIR/phb-html-parses/snapshots.list); do
        ((++PHB_KNDEX))
        (($PHB_KNDEX >= $PHB_JNDEX)) && {
            PHB_HAS_FOUND=1
            break
        }
    done
    (($PHB_HAS_FOUND)) || die "$(printf "$PHB_MSG90")"
    SNAPSHOT_SSDATE="$PHB_SNAPSHOT1"
    SNAPSHOT_D2DATE="$(convert_format_ss_to_d2 "$PHB_SNAPSHOT1")"
    readonly SNAPSHOT_SSDATE SNAPSHOT_D2DATE
    touch -d "$SNAPSHOT_D2DATE" -- "$TMPDIR/snapshot.stamp"
    (($OPT_STAMP)) && touch -d "$SNAPSHOT_D2DATE" -- "$STAMPOPT_FILE"

    return 0

}
readonly -f execute_phase_B
true

