#!/usr/local/sbin/charm-env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

if ! charms.reactive is_state 'apache-bigtop-resourcemanager.ready'; then
    action-fail 'ResourceManager not yet ready'
    exit
fi

IN_DIR=`action-get indir`
OUT_DIR=`action-get outdir`
SIZE=`action-get size`
OPTIONS=''

MAPS=`action-get maps`
REDUCES=`action-get reduces`
NUMTASKS=`action-get numtasks`
COMPRESSION=`action-get compression`

# construct options
OPTIONS="${OPTIONS} -D mapreduce.job.maps=${MAPS}"
OPTIONS="${OPTIONS} -D mapreduce.job.reduces=${REDUCES}"
OPTIONS="${OPTIONS} -D mapreduce.job.jvm.numtasks=${NUMTASKS}"
if [ $COMPRESSION == 'Disable' ] ; then
        OPTIONS="${OPTIONS} -D mapreduce.map.output.compress=false"
elif [ $COMPRESSION == 'LocalDefault' ] ; then
        OPTIONS="${OPTIONS}"
else
        OPTIONS="${OPTIONS} -D mapreduce.map.output.compress=true -D mapred.map.output.compress.codec=org.apache.hadoop.io.compress.${COMPRESSION}Codec"
fi

# create dir to store results
RUN=`date +%s`
RESULT_DIR=/opt/terasort-results
RESULT_LOG=${RESULT_DIR}/${RUN}.log
mkdir -p ${RESULT_DIR}
chown -R ubuntu:ubuntu ${RESULT_DIR}

benchmark_cleanup() {
  # must be run as ubuntu, since that user owns the hdfs space
  su - ubuntu -c "hadoop fs -rm -f -r -skipTrash ${IN_DIR} ${OUT_DIR}"
}

# clean out benchmark dir
benchmark_cleanup

benchmark-start
START=`date +%s`
# NB: Escaped vars in the block below (e.g., \${HADOOP_HOME}) come from
# /etc/environment while non-escaped vars (e.g., ${IN_DIR}) are parameterized
# from this outer scope
su ubuntu << EOF
. /etc/default/hadoop
echo 'generating data'
hadoop jar \${HADOOP_MAPRED_HOME}/hadoop-mapreduce-examples-*.jar teragen ${SIZE} ${IN_DIR} 2>&1 | tee -a ${RESULT_LOG}
echo 'sorting data'
hadoop jar \${HADOOP_MAPRED_HOME}/hadoop-mapreduce-examples-*.jar terasort ${OPTIONS} ${IN_DIR} ${OUT_DIR} 2>&1 | tee -a ${RESULT_LOG}
EOF
STOP=`date +%s`
benchmark-finish

# Set action and benchmark output
cat ${RESULT_LOG} | $CHARM_DIR/actions/parseBenchmark.py
DURATION=`expr $STOP - $START`
benchmark-composite "${DURATION}" 'secs' 'asc'
action-set outcome="success"

# clean out benchmark dir
benchmark_cleanup
