#!/bin/bash
# Copyright (c) 2010 Malte Schwarzkopf <malte.schwarzkopf@cl.cam.ac.uk>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# ----
#
# Skywriting/Ciel cluster launch script.
#
# usage: See sw-launch-cluster -h

# defaults
KEY="sw-masterkey"
SWUSER="root"
SWROOT="/opt/skywriting"
VERBOSE=0
SKIPMASTER=0
LOCAL_CLUSTER=0
CORE_PINNING=0
WORKER_PORT=8001
MASTER_PORT=8000
LOG_OPERATOR=">"
STORENAME="store"
LOGNAME="worker"
SEPARATE_STORES=0

# ---------------------------------------------
# option processing

while [ $# -gt 0 ]
do
  case $1
  in
    --al)
      LOG_OPERATOR=">>"
      shift 1
    ;;
    
    --core-pin)
      CORE_PINNING=1
      shift 1
    ;;

    -i|-k)
      KEY=$2
      shift 2
    ;;

    -f)
      if [[ $2 = '--' ]]; then
         # STDIN input
         SOURCE=/dev/stdin
      else
         # Cluster definition file
         SOURCE=$2
      fi
      shift 2
    ;;
    
    --lc|--localcluster)
      LOCAL_CLUSTER=1
      shift 1
    ;;
    
    --mp)
      MASTER_PORT=$2
      shift 2
    ;;

    -r|--swroot)
      SWROOT=$2
      shift 2
    ;;
    
    -u|--swuser)
      SWUSER=$2
      shift 2
    ;;

    -v)
      VERBOSE=1
      shift 1
    ;;

    -w|--wipejournal)
      WIPEJOURNAL=1
      shift 1
    ;;

    --wp)
      WORKER_PORT=$2
      shift 2
    ;;

    --skip-master)
      SKIPMASTER=1
      shift 1
    ;;

    --sep-stores)
      SEPARATE_STORES=1
      shift 1
    ;;

    --stopwatch)
      EXTRA_ARGS="$EXTRA_ARGS -S"
      shift 1
    ;;

    --no-reuse)
      EXTRA_ARGS="$EXTRA_ARGS -U"
      shift 1
    ;;

    --tcp)
      EXTRA_ARGS="$EXTR_ARGS -P 9000"
      shift 1
    ;;

    -h|*)
      echo "Launches a Skywriting/Ciel cluster given a list of machines. The master is "
      echo "lauched on the first machine in the list unless --skip-master is specified."
      echo ""
      echo "usage: sw-launch-cluster [-f cluster-file|(-i|-k) key|-r swroot|-u swuser|-v]"
      echo "--al: appends to the log file instead of overwriting it (default: off)."
      echo "      Ignored if -v is set."
      echo "--core-pin: pins worker processes to processors. N.B.: this assumes you actually "
      echo "            have at least as many processors as workers!"
      echo "-f: the file listing the machines in the cluster, one per line."
      echo "    If '--' is passed, STDIN is assumed."
      echo "-i|-k: the private key to use for authentication to cluster machines"
      echo "       (defaults to 'sw-masterkey')"
      echo "--lc|--localcluster: runs a local cluster, i.e. increments worker port numbers"
      echo "--mp: allows specification of a master port (default: 8000)"
      echo "-r|--swroot: the root directory of the remote Skywriting installation"
      echo "             (defaults to '/opt/skywriting')"
      echo "--skip-master: do not restart the master (ignore the first line of input)"
      echo "-u|--swuser: the user name of the Skywriting user on the cluster"
      echo "             (defaults to 'root')"
      echo "-v: verbose mode (don't surpress output from remote machines)"
      echo "-w|--wipejournal: wipe the job journal on the master before launching it"
      echo "--wp: allows specification of a worker port (default: 8001)"
      shift 1
      exit 0
    ;;
  esac
done

# ---------------------------------------------
# main script

exec 3<$SOURCE
I=0
while read -u 3 myLine
do
    echo -n "Setting up instance $I "
    if [[ $CORE_PINNING -eq 1 ]]; then 
	CORE_PIN_CMD="taskset -c $I"
    fi
    
    if [ "$I" -eq "0" ]; then
	echo "... as a master."
	if [[ ($VERBOSE -eq 1) && ($SKIPMASTER -eq 0) ]]; then
	    if [[ $WIPEJOURNAL -eq 1 ]]; then
		ssh -o StrictHostKeyChecking=no -i $KEY $SWUSER@$myLine "rm -rf $SWROOT/journal/*"
	    fi
	    ssh -o StrictHostKeyChecking=no -f -i $KEY $SWUSER@$myLine "MASTER_PORT=$MASTER_PORT nohup $CORE_PIN_CMD $SWROOT/scripts/run_master.sh $EXTRA_ARGS 2>&1 | tee $SWROOT/logs/master.log"
	elif [[ $SKIPMASTER -eq 0 ]]; then
	    if [[ $WIPEJOURNAL -eq 1 ]]; then
		ssh -o StrictHostKeyChecking=no -i $KEY $SWUSER@$myLine "rm -rf $SWROOT/journal/*"
	    fi
	    ssh -o StrictHostKeyChecking=no -f -i $KEY $SWUSER@$myLine "MASTER_PORT=$MASTER_PORT $CORE_PIN_CMD $SWROOT/scripts/run_master.sh $EXTRA_ARGS -D -o ${SWROOT}/logs/master.log"
	fi
	MASTER=$myLine
    else
        if [[ $LOCAL_CLUSTER -eq 1 && $SEPARATE_STORES -eq 1 ]]; then
	    STORENAME="store/store$I"
	    LOGNAME="worker$I"
	    WORKER_PORT=`expr $WORKER_PORT + 1` 
        elif [[ $LOCAL_CLUSTER -eq 1 ]]; then
	    WORKER_PORT=`expr $WORKER_PORT + 1` 
	else
	    STORENAME="store"
	    LOGNAME="worker"
	fi

	echo "... as a worker for $MASTER, port $WORKER_PORT"
	if [[ $VERBOSE -eq 1 ]]; then
	    ssh -o StrictHostKeyChecking=no -f -i $KEY $SWUSER@$myLine "WORKER_PORT=${WORKER_PORT} MASTER_HOST='http://$MASTER:${MASTER_PORT}' REL_BLOCK_LOCATION=$STORENAME nohup $CORE_PIN_CMD $SWROOT/scripts/run_worker.sh $EXTRA_ARGS 2>&1 | tee $SWROOT/logs/$LOGNAME.log"
	else
	    ssh -o StrictHostKeyChecking=no -f -i $KEY $SWUSER@$myLine "WORKER_PORT=${WORKER_PORT} MASTER_HOST='http://$MASTER:${MASTER_PORT}' REL_BLOCK_LOCATION=$STORENAME $CORE_PIN_CMD $SWROOT/scripts/run_worker.sh $EXTRA_ARGS -D -o ${SWROOT}/logs/$LOGNAME.log"
	fi
    fi
    I=`expr $I + 1`
done

exit 0
