#!/bin/sh
#
# This script starts and stops the Sidekiq daemon
# This script belongs in /engineyard/bin/sidekiq
#

PATH=/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:$PATH
CURDIR=`pwd`

usage() {
  echo "Usage: $0 <appname> {start|stop|quit} <environment> <conf_file>"
  echo -e "\nstop)     is a synonym for quit"
  echo "quit)     issues -INT to request the worker to stop"
  echo -e "\nSee http://mperham.github.com/sidekiq/ for more details"
  exit 1
}

rm_lockfile(){
  if [ -e $LOCK_FILE ]; then
   logger -t "monit-sidekiq[$$]" "removing $LOCK_FILE for `cat $LOCK_FILE`"
   rm $LOCK_FILE
  fi
}

exit_cleanly() {
  cd $CURDIR
  logger -t "monit-sidekiq[$$]" "exiting wrapper cleanly with $RESULT"
  exit $RESULT
}

unlock_and_exit_cleanly(){
  rm_lockfile
  exit_cleanly
}

set_pid_from_file(){
  export PID=`cat $PID_FILE`
}

signal_worker() {
  RESULT=0
  if [ -f $PID_FILE ]; then
    set_pid_from_file
    logger -t "monit-sidekiq[$$]" "Issuing kill with -$SIG $PID"
    SLEEP_COUNT=0
    kill -$SIG $PID 
  fi
}

signal_worker_fatally(){
  signal_worker() 
  if [ -f $PID_FILE ]; then
    if [ -n "$ALLOW_TIMEOUT" ]; then
      while [ -e /proc/$PID ]; do
        sleep .25
        let "SLEEP_COUNT+=1"
        let "REPORT_TIME = $SLEEP_COUNT%4"
        if(( "$SLEEP_COUNT" > $GRACE_TIME )); then
          logger -t "monit-sidekiq[$$]" "Sidekiq worker with pid $PID for $WORKER_REF still running, issuing -TERM"
          kill -15 $PID 2>/dev/null; true
        elif(( $REPORT_TIME == 0 )); then
          let "RUNTIME = $SLEEP_COUNT/4"
          logger -t "monit-sidekiq[$$]" "waiting for $PID to die ( for $RUNTIME seconds now)"
        fi 
      done
    fi
    sleep 1
    if [ -d /proc/$PID ];then 
      for child in $(ps axo pid,ppid | awk "{ if ( \$2 == $PID ) { print \$1 }}");
      do
        kill -9 $child 2>/dev/null; true
        logger -t "monit-sidekiq[$$]" "Murdering Sidekiq workers child with $child for $WORKER_REF"
        break
      done    
      while [ -d /proc/$PID ]; do
        logger -t "monit-sidekiq[$$]" "Murdering Sidekiq worker with $PID for $WORKER_REF"
        kill -9 $PID
        sleep 1
      done
    fi
    logger -t "monit-sidekiq[$$]" "Removing pid file for $PID - $WORKER_REF"
    [ -e "$PID_FILE" -a ! -d /proc/$PID ] && rm -f $PID_FILE
  fi
}

lock(){
  RESULT=0
  if [ -e $LOCK_FILE ]; then
    LAST_LOCK_PID=`cat $LOCK_FILE`
    if [ -n $LAST_LOCK_PID -a -z "`ps axo pid|grep $LAST_LOCK_PID`" -a -f $LOCK_FILE ];then
      sleep 1
      logger -t "monit-sidekiq[$$]" "Removing stale lock file for $WORKER_REF ($LAST_LOCK_PID)"
      rm $LOCK_FILE 2>&1
    else
      logger -t "monit-sidekiq[$$]" "Monit already messing with $WORKER_REF ($LAST_LOCK_PID)"
      RESULT=1
      exit_cleanly 
    fi
  fi
  echo $$ > $LOCK_FILE
}

legacy_fix() {
 #In the transition from 0.18.2 to 0.18.3 of ey monit scripts the way 
 #the pid file is used to find the process to kill has changed. 
 #To avert problems being left behind after an upgrade of this package,
  if [ -f $PID_FILE ]; then
    set_pid_from_file
    if [ -n "`ps axo pid,command|grep $PID|grep 'su -c'`" ];then
      logger -t "monit-sidekiq[$$]" "Monit Scripts have just been upgraded, killing old style workers"
      for child in $(ps axo pid,ppid| awk "{ if ( \$2 == $PID ) { print \$1 }}");
      do
        kill -TERM $child 2> /dev/null
        while [ -e /proc/$child ]; do
          logger -t "monit-sidekiq[$$]" "killing legacy worker: $child"
          [ -e /proc/$child ] && kill -9 $child 2> /dev/null
	  sleep 1 
        done
      done
      [ -e /proc/$PID   ] && kill -9 $PID   2> /dev/null
      rm $PID_FILE
      unlock_exit_cleanly  
    fi
  fi
}

if [ $# -lt 4 ]; then usage; fi

if [ "`whoami`" != "root" ]; then
  logger -t `basename $0` -s "Must be run as root" 
  exit 1
fi

#Baisc Setup of default values
APP=$1 ; ACTION=$2; RACK_ENV=$3; CONF_FILE=$4;

APP_DIR="/data/${APP}"
APP_ROOT="${APP_DIR}/current"
APP_SHARED="${APP_DIR}/shared"
APP_CONFIG="${APP_SHARED}/config"

if [ -e "${APP_CONFIG}/${CONF_FILE}" ]; then
  logger -t "sidekiq_${APP}" -s "Good, found a config file. Proceeding..."
else
  logger -t "sidekiq_${APP}" -s "/data/${APP}/shared/config/${CONF_FILE} not found for app: ${APP}"
  exit 1
fi

WORKER_REF=`echo $CONF_FILE | sed s/.yml.conf//`
LOG_FILE="$APP_ROOT/log/$WORKER_REF.log"
LOCK_FILE="/tmp/$WORKER_REF.monit-lock"
PID_FILE="/var/run/engineyard/sidekiq/$APP/$WORKER_REF.pid"
GEMFILE="$APP_ROOT/Gemfile"
SIDEKIQ="sidekiq"
if [ -f $GEMFILE ];then
  SIDEKIQ="bundle exec $APP_ROOT/ey_bundler_binstubs/sidekiq"
fi

if [ -d $APP_ROOT ]; then
  USER=$(stat -L -c"%U" $APP_ROOT)
  export HOME="/home/$USER" 

  # Fix for SD-3786 - stop sending in VERBOSE= and VVERBOSE= by default
  if declare -p VERBOSE >/dev/null 2>&1; then export V="VERBOSE=$VERBOSE"; fi
  if declare -p VVERBOSE >/dev/null 2>&1; then export VV="VVERBOSE=$VVERBOSE"; fi

  # Older versions of sudo need us to call env for the env vars to be set correctly
  COMMAND="/usr/bin/env $V $VV APP_ROOT=${APP_ROOT} RACK_ENV=${RACK_ENV} RAILS_ENV=${RACK_ENV} $SIDEKIQ -e ${RACK_ENV} -C ${APP_CONFIG}/${CONF_FILE}"
  
  if [ ! -d /var/run/engineyard/sidekiq/$APP ]; then
    mkdir -p /var/run/engineyard/sidekiq/$APP
  fi

  # handle the second param, don't start if already existing

  logger -t "monit-sidekiq[$$]" "${ACTION}ing Sidekiq worker $WORKER_REF"
  case "$ACTION" in
    start)
      lock
      cd $APP_ROOT
      if [ -f $PID_FILE ]; then
        set_pid_from_file
        if [ -d /proc/$PID ]; then
          logger -t "monit-sidekiq[$$]" "Sidekiq worker $WORKER_REF is already running with $PID."
          RESULT=1
        else
          rm -f $PID_FILE
          logger -t "monit-sidekiq[$$]" "Removing stale pid file ($PID_FILE) for pid $PID"
        fi
      fi
      if [ ! -f $PID_FILE ]; then
        sudo -u $USER -H $COMMAND  >> $LOG_FILE 2>&1 &
        RESULT=$?
        logger -t "monit-sidekiq[$$]" "Started with pid $! and exit $RESULT"
        echo $! > $PID_FILE
        sleep .1
      fi
      unlock_and_exit_cleanly 
      ;;
    stop|quit)
      legacy_fix
      lock
      SIG="INT"
      [ -z "$GRACE_TIME" ] && GRACE_TIME=60
      ALLOW_TIMEOUT=1
      signal_worker
      [ -e "$LOCK_FILE" ] && rm $LOCK_FILE
      unlock_and_exit_cleanly  
      ;;
    *)
      usage
      exit_cleanly      
      ;;
  esac
else
  echo "/data/$APP/current doesn't exist."
  usage
fi