#!/usr/bin/env bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

#  If not set by env,  set to 1 to run spark-submit instead of local java
#  This should be used to run with spark-submit instead of java
if [[ -z "$SYSDS_DISTRIBUTED" ]]; then
  SYSDS_DISTRIBUTED=0
fi

# if not set by env, set to 1 to disable setup output of this script
if [ -z "$SYSDS_QUIET" ]; then
  SYSDS_QUIET=0
fi

# if not set by env, set to default exec modes
if [[ -z "$SYSDS_EXEC_MODE" ]]; then
  case "$SYSDS_DISTRIBUTED" in
    0) SYSDS_EXEC_MODE=singlenode ;;
    *) SYSDS_EXEC_MODE=hybrid ;;
  esac
fi

# an echo toggle
print_out()
{
  if [ $SYSDS_QUIET == 0 ]; then
    echo "$1"
  fi
}

if [[ -z $SYSTEMDS_ROOT ]] ; then
  SYSTEMDS_ROOT=$(pwd)
  print_out "SYSTEMDS_ROOT not set defaulting to current dir $(pwd)"
fi;

# when using find, look in the directories in this order
DIR_SEARCH_ORDER="$SYSTEMDS_ROOT/target . $SYSTEMDS_ROOT $SYSTEMDS_ROOT/conf  $SYSTEMDS_ROOT/lib $SYSTEMDS_ROOT/src"
ordered_find() {
  result=""
  for dir in $(echo "$DIR_SEARCH_ORDER" | tr ' ' '\n') ; do
    if [[ $dir == "$SYSTEMDS_ROOT" ]] || [[ $dir == "." ]]; then
      result=$(find "$dir" -maxdepth 1 -iname "$1" -print -quit)
      if [[ $result != "" ]]; then break; fi
    else
      result=$(find "$dir" -iname "$1" -print -quit 2> /dev/null)
      if [[ $result != "" ]]; then break; fi
    fi
  done
  echo "$result"
}

if [ -n "$SYSTEMDS_STANDALONE_OPTS" ]; then
	print_out "Overriding SYSTEMDS_STANDALONE_OPTS with env var: $SYSTEMDS_STANDALONE_OPTS"
else
  # specify parameters  to java when running locally here
  SYSTEMDS_STANDALONE_OPTS="-Xmx4g -Xms4g -Xmn400m "
fi

if [ -n "$SYSTEMDS_REMOTE_DEBUGGING" ]; then
	print_out "Overriding SYSTEMDS_REMOTE_DEBUGGING with env var: $SYSTEMDS_REMOTE_DEBUGGING"
else
  SYSTEMDS_REMOTE_DEBUGGING=" -agentlib:jdwp=transport=dt_socket,suspend=y,address=8787,server=y "
fi

# check if log4j config file exists, otherwise unset
# to run with a non fatal complaint by SystemDS
if [ -z "$LOG4JPROP" ] ; then
  # before wild card search look obvious places.
  if [ -f "$SYSTEMDS_ROOT/conf/log4j.properties" ]; then 
    LOG4JPROP="$SYSTEMDS_ROOT/conf/log4j.properties"
  elif [ -f "$SYSTEMDS_ROOT/log4j.properties" ]; then 
    LOG4JPROP="$SYSTEMDS_ROOT/log4j.properties"
  else # wildcard search
    LOG4JPROP=$(ordered_find "log4j*properties")
  fi
fi

# If the LOG4J variable is declared or found.
if [ -f "${LOG4JPROP}" ]; then
  LOG4JPROPFULL="-Dlog4j.configuration=file:$LOG4JPROP"
fi

if [ -n "${SYSTEMDS_DISTRIBUTED_OPTS}" ]; then
  print_out "Overriding SYSTEMDS_DISTRIBUTED_OPTS with env var $SYSTEMDS_DISTRIBUTED_OPTS"
else
  # specify parameters to pass to spark-submit when running on spark here
  SYSTEMDS_DISTRIBUTED_OPTS="\
      --master yarn \
      --deploy-mode client \
      --driver-memory 100g \
      --conf spark.driver.extraJavaOptions=\"-Xms100g -Xmn10g -Dlog4j.configuration=file:$LOG4JPROP\" \
      --conf spark.executor.extraJavaOptions=\"-Dlog4j.configuration=file:$LOG4JPROP\" \
      --conf spark.executor.heartbeatInterval=100s \
      --files $LOG4JPROP \
      --conf spark.network.timeout=512s \
      --num-executors 4 \
      --executor-memory 64g \
      --executor-cores 16 "
fi


# error help print
function printUsage {
cat << EOF

Usage: $0 [-r] [SystemDS.jar] [-f] <dml-filename> [arguments] [-help]

    SystemDS.jar : Specify a custom SystemDS.jar file (this will be prepended
                   to the classpath
                   or fed to spark-submit
    -r           : Spawn a debug server for remote debugging (standalone and
                   spark driver only atm). Default port is 8787 - change within
                   this script if necessary. See SystemDS documentation on how
                   to attach a remote debugger.
    -f           : Optional prefix to the dml-filename for consistency with
                   previous behavior dml-filename : The script file to run.
                   This is mandatory unless running as a federated worker
                   (see below).
    arguments    : The arguments specified after the DML script are passed to
                   SystemDS. Specify parameters that need to go to
                   java/spark-submit by editing this run script.
    -help        : Print this usage message and SystemDS parameter info

Worker Usage: $0 [-r] WORKER [SystemDS.jar] <portnumber> [arguments] [-help]

    port         : The port to open for the federated worker.

Federated Monitoring Usage: $0 [-r] FEDMONITORING [SystemDS.jar] <portnumber> [arguments] [-help]

    port         : The port to open for the federated monitoring tool.

Set custom launch configuration by setting/editing SYSTEMDS_STANDALONE_OPTS
and/or SYSTEMDS_DISTRIBUTED_OPTS.

Set the environment variable SYSDS_DISTRIBUTED=1 to run spark-submit instead of
local java Set SYSDS_QUIET=1 to omit extra information printed by this run
script.

EOF
}

# print an error if no argument is supplied.
if [ -z "$1" ] ; then
    echo "Wrong Usage. Add -help for additional parameters.";
    echo ""
    printUsage;
    exit -1
fi

#This loop handles the parameters to the run-script, not the ones passed to SystemDS.
#To not confuse getopts with SystemDS parameters, only the first two params are considered
#here. If more run-script params are needed, adjust the next line accordingly
PRINT_SYSDS_HELP=0
while getopts ":hr:f:" options "$1$2"; do
  case $options in
    h ) echo "Help requested. Will exit after extended usage message!"
        printUsage
        PRINT_SYSDS_HELP=1
        break
        ;;
    \? ) echo "Unknown parameter -$OPTARG"
        printUsage
        exit
        ;;
    f )
        # silently remove -f (this variant is triggered if there's no
        # jar file or WORKER as first parameter)
        if  echo "$OPTARG" | grep -qi "dml"; then
          break
        else
          print_out "No DML Script found after -f option."
        fi
        ;;
    r )
        print_out "Spawning server for remote debugging"
        if [ $SYSDS_DISTRIBUTED == 0 ]; then
          SYSTEMDS_STANDALONE_OPTS=${SYSTEMDS_STANDALONE_OPTS}${SYSTEMDS_REMOTE_DEBUGGING}
        else
          SYSTEMDS_DISTRIBUTED_OPTS=${SYSTEMDS_DISTRIBUTED_OPTS}${SYSTEMDS_REMOTE_DEBUGGING}
        fi
        shift # remove -r from positional arguments
        ;;
    * )
      print_out "Error: Unexpected error while processing options;"
      printUsage
      exit
  esac
done

# Peel off first and/or second argument so that $@ contains arguments to DML script
if  echo "$1" | grep -q "jar"; then
  SYSTEMDS_JAR_FILE=$1
  shift
  # handle optional '-f' before DML file (for consistency)
  if  echo "$1" | grep -q "\-f"; then
    shift
    SCRIPT_FILE=$1
    shift
  else
    SCRIPT_FILE=$1
    shift
  fi
elif echo "$1" | grep -q "WORKER"; then
  WORKER=1
  shift
  if echo "$1" | grep -q "jar"; then
    SYSTEMDS_JAR_FILE=$1
    shift
  fi
  PORT=$1
  re='^[0-9]+$'
  if ! [[ $PORT =~ $re ]] ; then
    echo "error: Port is not a number"
    printUsage
  fi
  shift
elif echo "$1" | grep -q "FEDMONITORING"; then
  FEDMONITORING=1
  shift
  if echo "$1" | grep -q "jar"; then
    SYSTEMDS_JAR_FILE=$1
    shift
  fi
  PORT=$1
  re='^[0-9]+$'
  if ! [[ $PORT =~ $re ]] ; then
    echo "error: Port is not a number"
    printUsage
  fi
  shift
else
  # handle optional '-f' before DML file (for consistency)
  if  echo "$1" | grep -q "\-f"; then
    shift
    SCRIPT_FILE=$1
    shift
  else
    SCRIPT_FILE=$1
    shift
  fi
fi

if [ -z "$WORKER" ] ; then
  WORKER=0
fi

if [ -z "$FEDMONITORING" ] ; then
  FEDMONITORING=0
fi

# find a SystemDS jar file to run
if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then # If it is not found yet.
  if [ ! -z ${SYSTEMDS_ROOT+x} ]; then # Check currently set SYSETMDS_ROOT
    # Current SYSTEMDS_ROOT is set and is a directory.
    if [ -d "$SYSTEMDS_ROOT/target" ] && [ -d "$SYSTEMDS_ROOT/.git" ]; then 
      # Current path is most likely a build directory of systemds
      SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT/target" -maxdepth 1 -iname ""systemds-?.?.?-SNAPSHOT.jar"" -print -quit)
    elif [ -d "$SYSTEMDS_ROOT" ] && [ -d "$SYSTEMDS_ROOT/lib" ]; then
      # Most likely a release directory.
      SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT" -maxdepth 1 -iname ""systemds-?.?.?-SNAPSHOT.jar"" -print -quit)
    fi
  fi 
fi 

# If no jar file is found, start searching --- expected + 70 ms execution time
if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then 
  SYSTEMDS_JAR_FILE=$(ordered_find "systemds.jar")
  if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then
    SYSTEMDS_JAR_FILE=$(ordered_find "systemds-?.?.?.jar")
    if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then
      SYSTEMDS_JAR_FILE=$(ordered_find "systemds-?.?.?-SNAPSHOT.jar")
      if [ -z ${SYSTEMDS_JAR_FILE+x} ]; then
        echo "wARNING: Unable to find SystemDS jar file to launch"
        exit -1
      fi  
    fi
  fi
fi

if [[ "$*" == *-config* ]]; then
# override config file from env var if given as parameter to SystemDS
  read -r -d '' -a myArray < <( echo "$@" )
  INDEX=0
  for i in "${myArray[@]}"; do
    if [[ ${myArray[INDEX]} == *-config* ]]; then
      if [ -f "${myArray[((INDEX+1))]}" ]; then
        CONFIG_FILE="${myArray[((INDEX+1))]}"
      else
        echo Warning! Passed config file "${myArray[((INDEX+1))]}" does not exist.
      fi
      # remove -config
      unset 'myArray[INDEX]'
      
      # remove -config param if not starting with -
      if [[ "${myArray[((INDEX+1))]:0:1}" != "-" ]]; then
        unset 'myArray[((INDEX+1))]'
      fi
      # setting the script arguments without the passed -config for further processing  
      set -- "${myArray[@]}"
      break;
    fi
    # debug print array item
    #echo "${myArray[INDEX]}" 
    (( INDEX=INDEX+1 ))
  done

  if [ -f "$CONFIG_FILE" ] ; then
    CONFIG_FILE="-config $CONFIG_FILE"
  else
    CONFIG_FILE=""
  fi
elif [ -z "$CONFIG_FILE" ] ; then

  # default search for config file 
  if [ -f "$SYSTEMDS_ROOT/conf/SystemDS-config-defaults.xml" ]; then 
    CONFIG_FILE="$SYSTEMDS_ROOT/conf/SystemDS-config-defaults.xml"
  elif [ -f "$SYSTEMDS_ROOT/SystemDS-config-defaults.xml" ]; then 
    CONFIG_FILE="$SYSTEMDS_ROOT/conf/SystemDS-config-defaults.xml"
  else # wildcard search
    # same as above: set config file param if the file exists
    CONFIG_FILE=$(ordered_find "SystemDS-config-defaults.xml")
  fi

  if [ -z "$CONFIG_FILE" ]; then # Second search if still not found.
    CONFIG_FILE=$(ordered_find "SystemDS-config.xml")
  fi

  if [ -z "$CONFIG_FILE" ]; then
    CONFIG_FILE=""
  else
    CONFIG_FILE="-config $CONFIG_FILE"
  fi
else
  # CONFIG_FILE was set by env var. Unset if that setting is wrong
  if [ -f "${CONFIG_FILE}" ]; then
    CONFIG_FILE="-config $CONFIG_FILE"
  else
    CONFIG_FILE=""
  fi
fi

# override exec mode if given as parameter to SystemDS (e.g. -exec singlenode)
read -r -d '' -a myArray < <( echo "$@" )
INDEX=0
for i in "${myArray[@]}"; do
  if [[ "$i" == *-exec* ]]; then
    SYSDS_EXEC_MODE="${myArray[((INDEX+1))]}"
    break;
  fi
  (( INDEX=INDEX+1 ))
done

# find absolute path to hadoop home in SYSTEMDS_ROOT
if [ -z "$HADOOP_HOME" ]; then
  HADOOP_HOME="$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )"
fi

# detect operating system to set correct path separator
if [ "$OSTYPE" == "win32" ] ||  [ "$OSTYPE" == "msys" ] ||  [ "$OSTYPE" == "cygwin" ]; then
  PATH_SEP=\;
  DIR_SEP=\\
else
  # default directory separator unix style
  DIR_SEP=/
  PATH_SEP=:
fi


NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib"
PATH=${HADOOP_HOME}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS
LD_LIBRARY_PATH=${HADOOP_HOME}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH}


if [ $PRINT_SYSDS_HELP == 1 ]; then
  echo "----------------------------------------------------------------------"
  echo "Further help on SystemDS arguments:"
  java -jar $SYSTEMDS_JAR_FILE org.apache.sysds.api.DMLScript -help
  exit 1
fi

if [ $SYSDS_QUIET == 0 ]; then
  print_out "###############################################################################"
  print_out "#  SYSTEMDS_ROOT= $SYSTEMDS_ROOT"
  print_out "#  SYSTEMDS_JAR_FILE= $SYSTEMDS_JAR_FILE"
  print_out "#  SYSDS_EXEC_MODE= $SYSDS_EXEC_MODE"
  print_out "#  CONFIG_FILE= $CONFIG_FILE"
  print_out "#  LOG4JPROP= $LOG4JPROPFULL"
  print_out "#  HADOOP_HOME= $HADOOP_HOME"
  print_out "#"
fi

# Build the command to run
if [ $WORKER == 1 ]; then
  print_out "#  starting Federated worker on port $PORT"
  CMD=" \
  java $SYSTEMDS_STANDALONE_OPTS \
  $LOG4JPROPFULL \
  -jar $SYSTEMDS_JAR_FILE \
  -w $PORT \
  $CONFIG_FILE \
  $*"
elif [ "$FEDMONITORING" == 1 ]; then
  print_out "#  starting Federated backend monitoring on port $PORT"
  CMD=" \
  java $SYSTEMDS_STANDALONE_OPTS \
  $LOG4JPROPFULL \
  -jar $SYSTEMDS_JAR_FILE \
  -fedMonitoring $PORT \
  $CONFIG_FILE \
  $*"
elif [ $SYSDS_DISTRIBUTED == 0 ]; then
  print_out "#  Running script $SCRIPT_FILE locally with opts: $*"

  CMD=" \
  java $SYSTEMDS_STANDALONE_OPTS \
  $LOG4JPROPFULL \
  -jar $SYSTEMDS_JAR_FILE \
  -f $SCRIPT_FILE \
  -exec $SYSDS_EXEC_MODE \
  $CONFIG_FILE \
  $*"
else
  print_out "#  Running script $SCRIPT_FILE distributed with opts: $*"
  CMD=" \
  spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \
  $SYSTEMDS_JAR_FILE \
  -f $SCRIPT_FILE \
  -exec $SYSDS_EXEC_MODE \
  $CONFIG_FILE \
  $*"
fi

if [ $SYSDS_QUIET == 0 ]; then
  print_out "#  Executing command: $CMD"
  print_out "###############################################################################"
fi

# run
eval "$CMD"
