# Bail out before loading anything unless this flag is set.
#
# We are doing this to guard against bundler autoloading because there is
# no value in loading god in most processes.
if $load_god

# core
require 'stringio'
require 'fileutils'

begin
  require 'fastthread'
rescue LoadError
ensure
  require 'thread'
end

# stdlib

# internal requires
require 'god/errors'
require 'god/simple_logger'
require 'god/logger'
require 'god/sugar'

require 'god/system/process'
require 'god/system/portable_poller'
require 'god/system/slash_proc_poller'

require 'god/timeline'
require 'god/configurable'

require 'god/task'

require 'god/behavior'
require 'god/behaviors/clean_pid_file'
require 'god/behaviors/clean_unix_socket'
require 'god/behaviors/notify_when_flapping'

require 'god/condition'
require 'god/conditions/process_running'
require 'god/conditions/process_exits'
require 'god/conditions/tries'
require 'god/conditions/memory_usage'
require 'god/conditions/cpu_usage'
require 'god/conditions/always'
require 'god/conditions/lambda'
require 'god/conditions/degrading_lambda'
require 'god/conditions/flapping'
require 'god/conditions/http_response_code'
require 'god/conditions/disk_usage'
require 'god/conditions/complex'
require 'god/conditions/file_mtime'
require 'god/conditions/file_touched'
require 'god/conditions/socket_responding'

require 'god/socket'
require 'god/driver'

require 'god/metric'
require 'god/watch'

require 'god/trigger'
require 'god/event_handler'
require 'god/registry'
require 'god/process'

require 'god/cli/version'
require 'god/cli/command'

# ruby 1.8 specific configuration
if RUBY_VERSION < '1.9'
  $KCODE = 'u'
end

CONTACT_DEPS = { }
CONTACT_LOAD_SUCCESS = { }

def load_contact(name)
  require "god/contacts/#{name}"
  CONTACT_LOAD_SUCCESS[name] = true
rescue LoadError
  CONTACT_LOAD_SUCCESS[name] = false
end

require 'god/contact'
load_contact(:campfire)
load_contact(:hipchat)
load_contact(:email)
load_contact(:jabber)
load_contact(:prowl)
load_contact(:scout)
load_contact(:statsd)
load_contact(:twitter)
load_contact(:webhook)
load_contact(:airbrake)
load_contact(:slack)
load_contact(:sensu)

$:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])

# App wide logging system
LOG = God::Logger.new

def applog(watch, level, text)
  LOG.log(watch, level, text)
end

# The $run global determines whether god should be started when the
# program would normally end. This should be set to true if when god
# should be started (e.g. `god -c <config file>`) and false otherwise
# (e.g. `god status`)
$run ||= nil

GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))

# Return the binding of god's root level
def root_binding
  binding
end

module Kernel
  alias_method :abort_orig, :abort

  def abort(text = nil)
    $run = false
    applog(nil, :error, text) if text
    exit(1)
  end

  alias_method :exit_orig, :exit

  def exit(code = 0)
    $run = false
    exit_orig(code)
  end
end

class Module
  def safe_attr_accessor(*args)
    args.each do |arg|
      define_method((arg.to_s + "=").intern) do |other|
        if !self.running && self.inited
          abort "God.#{arg} must be set before any Tasks are defined"
        end

        if self.running && self.inited
          applog(nil, :warn, "God.#{arg} can't be set while god is running")
          return
        end

        instance_variable_set(('@' + arg.to_s).intern, other)
      end

      define_method(arg) do
        instance_variable_get(('@' + arg.to_s).intern)
      end
    end
  end
end

module God
  # The String version number for this package.
  VERSION = '0.13.6'

  # The Integer number of lines of backlog to keep for the logger.
  LOG_BUFFER_SIZE_DEFAULT = 100

  # An Array of directory paths to be used as the default PID file directory.
  # This list will be searched in order and the first one that has write
  # permissions will be used.
  PID_FILE_DIRECTORY_DEFAULTS = ['/var/run/god', '~/.god/pids']

  # The default Integer port number for the DRb communcations channel.
  DRB_PORT_DEFAULT = 17165

  # The default Array of String IPs that will allow DRb communication access.
  DRB_ALLOW_DEFAULT = ['127.0.0.1']

  # The default Symbol log level.
  LOG_LEVEL_DEFAULT = :info

  # The default Integer number of seconds to wait for god to terminate when
  # issued the quit command.
  TERMINATE_TIMEOUT_DEFAULT = 10

  # The default Integer number of seconds to wait for a process to terminate.
  STOP_TIMEOUT_DEFAULT = 10

  # The default String signal to send for the stop command.
  STOP_SIGNAL_DEFAULT = 'TERM'

  class << self
    # user configurable
    safe_attr_accessor :pid,
                       :host,
                       :port,
                       :allow,
                       :log_buffer_size,
                       :pid_file_directory,
                       :log_file,
                       :log_level,
                       :use_events,
                       :terminate_timeout,
                       :socket_user,
                       :socket_group,
                       :socket_perms

    # internal
    attr_accessor :inited,
                  :running,
                  :pending_watches,
                  :pending_watch_states,
                  :server,
                  :watches,
                  :groups,
                  :contacts,
                  :contact_groups,
                  :main
  end

  # Initialize class instance variables.
  self.pid = nil
  self.host = nil
  self.port = nil
  self.allow = nil
  self.log_buffer_size = nil
  self.pid_file_directory = nil
  self.log_level = nil
  self.terminate_timeout = nil
  self.socket_user = nil
  self.socket_group = nil
  self.socket_perms = 0755

  # Initialize internal data.
  #
  # Returns nothing.
  def self.internal_init
    # Only do this once.
    return if self.inited

    # Variable init.
    self.watches = {}
    self.groups = {}
    self.pending_watches = []
    self.pending_watch_states = {}
    self.contacts = {}
    self.contact_groups = {}

    # Set defaults.
    self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
    self.port ||= DRB_PORT_DEFAULT
    self.allow ||= DRB_ALLOW_DEFAULT
    self.log_level ||= LOG_LEVEL_DEFAULT
    self.terminate_timeout ||= TERMINATE_TIMEOUT_DEFAULT

    # Additional setup.
    self.setup

    # Log level.
    log_level_map = {:debug => Logger::DEBUG,
                     :info => Logger::INFO,
                     :warn => Logger::WARN,
                     :error => Logger::ERROR,
                     :fatal => Logger::FATAL}
    LOG.level = log_level_map[self.log_level]

    # Init has been executed.
    self.inited = true

    # Not yet running.
    self.running = false
  end

  # Instantiate a new, empty Watch object and pass it to the mandatory block.
  # The attributes of the watch will be set by the configuration file. Aborts
  # on duplicate watch name, invalid watch, or conflicting group name.
  #
  # Returns nothing.
  def self.watch(&block)
    self.task(Watch, &block)
  end

  # Instantiate a new, empty Task object and yield it to the mandatory block.
  # The attributes of the task will be set by the configuration file. Aborts
  # on duplicate task name, invalid task, or conflicting group name.
  #
  # Returns nothing.
  def self.task(klass = Task)
    # Ensure internal init has run.
    self.internal_init

    t = klass.new
    yield(t)

    # Do the post-configuration.
    t.prepare

    # If running, completely remove the watch (if necessary) to prepare for
    # the reload
    existing_watch = self.watches[t.name]
    if self.running && existing_watch
      self.pending_watch_states[existing_watch.name] = existing_watch.state
      self.unwatch(existing_watch)
    end

    # Ensure the new watch has a unique name.
    if self.watches[t.name] || self.groups[t.name]
      abort "Task name '#{t.name}' already used for a Task or Group"
    end

    # Ensure watch is internally valid.
    t.valid? || abort("Task '#{t.name}' is not valid (see above)")

    # Add to list of watches.
    self.watches[t.name] = t

    # Add to pending watches.
    self.pending_watches << t

    # Add to group if specified.
    if t.group
      # Ensure group name hasn't been used for a watch already.
      if self.watches[t.group]
        abort "Group name '#{t.group}' already used for a Task"
      end

      self.groups[t.group] ||= []
      self.groups[t.group] << t
    end

    # Register watch.
    t.register!

    # Log.
    if self.running && existing_watch
      applog(t, :info, "#{t.name} Reloaded config")
    elsif self.running
      applog(t, :info, "#{t.name} Loaded config")
    end
  end

  # Unmonitor and remove the given watch from god.
  #
  # watch - The Watch to remove.
  #
  # Returns nothing.
  def self.unwatch(watch)
    # Unmonitor.
    watch.unmonitor unless watch.state == :unmonitored

    # Unregister.
    watch.unregister!

    # Remove from watches.
    self.watches.delete(watch.name)

    # Remove from groups.
    if watch.group
      self.groups[watch.group].delete(watch)
    end

    applog(watch, :info, "#{watch.name} unwatched")
  end

  # Instantiate a new Contact of the given kind and send it to the block.
  # Then prepare, validate, and record the Contact. Aborts on invalid kind,
  # duplicate contact name, invalid contact, or conflicting group name.
  #
  # kind - The Symbol contact class specifier.
  #
  # Returns nothing.
  def self.contact(kind)
    # Ensure internal init has run.
    self.internal_init

    # Verify contact has been loaded.
    if CONTACT_LOAD_SUCCESS[kind] == false
      applog(nil, :error, "A required dependency for the #{kind} contact is unavailable.")
      applog(nil, :error, "Run the following commands to install the dependencies:")
      CONTACT_DEPS[kind].each do |d|
        applog(nil, :error, "  [sudo] gem install #{d}")
      end
      abort
    end

    # Create the contact.
    begin
      c = Contact.generate(kind)
    rescue NoSuchContactError => e
      abort e.message
    end

    # Send to block so config can set attributes.
    yield(c) if block_given?

    # Call prepare on the contact.
    c.prepare

    # Remove existing contacts of same name.
    existing_contact = self.contacts[c.name]
    if self.running && existing_contact
      self.uncontact(existing_contact)
    end

    # Warn and noop if the contact has been defined before.
    if self.contacts[c.name] || self.contact_groups[c.name]
      applog(nil, :warn, "Contact name '#{c.name}' already used for a Contact or Contact Group")
      return
    end

    # Abort if the Contact is invalid, the Contact will have printed out its
    # own error messages by now.
    unless Contact.valid?(c) && c.valid?
      abort "Exiting on invalid contact"
    end

    # Add to list of contacts.
    self.contacts[c.name] = c

    # Add to contact group if specified.
    if c.group
      # Ensure group name hasn't been used for a contact already.
      if self.contacts[c.group]
        abort "Contact Group name '#{c.group}' already used for a Contact"
      end

      self.contact_groups[c.group] ||= []
      self.contact_groups[c.group] << c
    end
  end

  # Remove the given contact from god.
  #
  # contact - The Contact to remove.
  #
  # Returns nothing.
  def self.uncontact(contact)
    self.contacts.delete(contact.name)
    if contact.group
      self.contact_groups[contact.group].delete(contact)
    end
  end

  def self.watches_by_name(name)
    case name 
      when "", nil then self.watches.values.dup
      else Array(self.watches[name] || self.groups[name]).dup
    end
  end

  # Control the lifecycle of the given task(s).
  #
  # name    - The String name of a task/group. If empty, invokes command for all tasks.
  # command - The String command to run. Valid commands are:
  #           "start", "monitor", "restart", "stop", "unmonitor", "remove".
  #
  # Returns an Array of String task names affected by the command.
  def self.control(name, command)
    # Get the list of items.
    items = self.watches_by_name(name)

    jobs = []

    # Do the command.
    case command
      when "start", "monitor"
        items.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
      when "restart"
        items.each { |w| jobs << Thread.new { w.move(:restart) } }
      when "stop"
        items.each { |w| jobs << Thread.new { w.action(:stop); w.unmonitor if w.state != :unmonitored } }
      when "unmonitor"
        items.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
      when "remove"
        items.each { |w| self.unwatch(w) }
      else
        raise InvalidCommandError.new
    end

    jobs.each { |j| j.join }

    items.map { |x| x.name }
  end

  # Unmonitor and stop all tasks.
  #
  # Returns true on success, false if all tasks could not be stopped within 10
  # seconds
  def self.stop_all
    self.watches.sort.each do |name, w|
      Thread.new do
        w.unmonitor if w.state != :unmonitored
        w.action(:stop) if w.alive?
      end
    end

    terminate_timeout.times do
      return true unless self.watches.map { |name, w| w.alive? }.any?
      sleep 1
    end

    return false
  end

  # Force the termination of god.
  # * Clean up pid file if one exists
  # * Stop DRb service
  # * Hard exit using exit!
  #
  # Never returns because the process will no longer exist!
  def self.terminate
    FileUtils.rm_f(self.pid) if self.pid
    self.server.stop if self.server
    exit!(0)
  end

  # Gather the status of each task.
  #
  # Examples
  #
  #   God.status
  #   # => { 'mongrel' => :up, 'nginx' => :up }
  #
  # Returns a Hash where the key is the String task name and the value is the
  #   Symbol status.
  def self.status
    info = {}
    self.watches.map do |name, w|
      info[name] = {:state => w.state, :group => w.group}
    end
    info
  end

  # Send a signal to each task.
  #
  # name   - The String name of the task or group.
  # signal - The String or integer signal to send. e.g. 'HUP', 9.
  #
  # Returns an Array of String names of the tasks affected.
  def self.signal(name, signal)
    items = watches_by_name(name)
    jobs = []
    items.each { |w| jobs << Thread.new { w.signal(signal) } }
    jobs.each { |j| j.join }
    items.map { |x| x.name }
  end

  # Log lines for the given task since the specified time.
  #
  # watch_name - The String name of the task (may be abbreviated).
  # since      - The Time since which to report log lines.
  #
  # Raises God::NoSuchWatchError if no tasks matched.
  # Returns the String of newline separated log lines.
  def self.running_log(watch_name, since)
    matches = pattern_match(watch_name, self.watches.keys)

    unless matches.first
      raise NoSuchWatchError.new
    end

    LOG.watch_log_since(matches.first, since)
  end

  # Load a config file into a running god instance. Rescues any exceptions
  # that the config may raise and reports these back to the caller.
  #
  # code     - The String config file contents.
  # filename - The filename of the config file.
  # action   - The optional String command specifying how to deal with
  #            existing watches. Valid options are: 'stop', 'remove' or
  #            'leave' (default).
  #
  # Returns a three-tuple Array [loaded_names, errors, unloaded_names] where:
  #         loaded_names   - The Array of String task names that were loaded.
  #         errors         - The String of error messages produced during the
  #                          load phase. Will be a blank String if no errors
  #                          were encountered.
  #         unloaded_names - The Array of String task names that were unloaded
  #                          from the system (if 'remove' or 'stop' was
  #                          specified as the action).
  def self.running_load(code, filename, action = nil)
    errors = ""
    loaded_watches = []
    unloaded_watches = []
    jobs = []

    begin
      LOG.start_capture

      Gem.clear_paths
      eval(code, root_binding, filename)
      self.pending_watches.each do |w|
        if previous_state = self.pending_watch_states[w.name]
          w.monitor unless previous_state == :unmonitored
        else
          w.monitor if w.autostart?
        end
      end
      loaded_watches = self.pending_watches.map { |w| w.name }
      self.pending_watches.clear
      self.pending_watch_states.clear

      self.watches.each do |name, watch|
        next if loaded_watches.include?(name)

        case action
        when 'stop'
          jobs << Thread.new(watch) { |w| w.action(:stop); self.unwatch(w) }
          unloaded_watches << name
        when 'remove'
          jobs << Thread.new(watch) { |w| self.unwatch(w) }
          unloaded_watches << name
        when 'leave', '', nil
          # Do nothing
        else
          raise InvalidCommandError, "Unknown action: #{action}"
        end
      end

      # Make sure we quit capturing when we're done.
      LOG.finish_capture
    rescue Exception => e
      # Don't ever let running_load take down god.
      errors << LOG.finish_capture

      unless e.instance_of?(SystemExit)
        errors << e.message << "\n"
        errors << e.backtrace.join("\n")
      end
    end

    jobs.each { |t| t.join }

    [loaded_watches, errors, unloaded_watches]
  end

  # Load the given file(s) according to the given glob.
  #
  # glob - The glob-enabled String path to load.
  #
  # Returns nothing.
  def self.load(glob)
    Dir[glob].each do |f|
      Kernel.load f
    end
  end

  # Setup pid file directory and log system.
  #
  # Returns nothing.
  def self.setup
    if self.pid_file_directory
      # Pid file dir was specified, ensure it is created and writable.
      unless File.exist?(self.pid_file_directory)
        begin
          FileUtils.mkdir_p(self.pid_file_directory)
        rescue Errno::EACCES => e
          abort "Failed to create pid file directory: #{e.message}"
        end
      end

      unless File.writable?(self.pid_file_directory)
        abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
      end
    else
      # No pid file dir specified, try defaults.
      PID_FILE_DIRECTORY_DEFAULTS.each do |idir|
        dir = File.expand_path(idir)
        begin
          FileUtils.mkdir_p(dir)
          if File.writable?(dir)
            self.pid_file_directory = dir
            break
          end
        rescue Errno::EACCES => e
        end
      end

      unless self.pid_file_directory
        dirs = PID_FILE_DIRECTORY_DEFAULTS.map { |x| File.expand_path(x) }
        abort "No pid file directory exists, could be created, or is writable at any of #{dirs.join(', ')}"
      end
    end

    if God::Logger.syslog
      LOG.info("Syslog enabled.")
    else
      LOG.info("Syslog disabled.")
    end

    applog(nil, :info, "Using pid file directory: #{self.pid_file_directory}")
  end

  # Initialize and startup the machinery that makes god work.
  #
  # Returns nothing.
  def self.start
    self.internal_init

    # Instantiate server.
    self.server = Socket.new(self.port, self.socket_user, self.socket_group, self.socket_perms)

    # Start monitoring any watches set to autostart.
    self.watches.values.each { |w| w.monitor if w.autostart? }

    # Clear pending watches.
    self.pending_watches.clear

    # Mark as running.
    self.running = true

    # Don't exit.
    self.main =
    Thread.new do
      loop do
        sleep 60
      end
    end
  end

  # Prevent god from exiting.
  #
  # Returns nothing.
  def self.join
    self.main.join if self.main
  end

  # Returns the version String.
  def self.version
    God::VERSION
  end

  # To be called on program exit to start god.
  #
  # Returns nothing.
  def self.at_exit
    self.start
    self.join
  end

  # private

  # Match a shortened pattern against a list of String candidates.
  # The pattern is expanded into a regular expression by
  # inserting .* between each character.
  #
  # pattern - The String containing the abbreviation.
  # list    - The Array of Strings to match against.
  #
  # Examples
  #
  #   list = %w{ foo bar bars }
  #   pattern = 'br'
  #   God.pattern_match(list, pattern)
  #   # => ['bar', 'bars']
  #
  # Returns the Array of matching name Strings.
  def self.pattern_match(pattern, list)
    regex = pattern.split('').join('.*')

    list.select do |item|
      item =~ Regexp.new(regex)
    end.sort_by { |x| x.size }
  end
end

# Runs immediately before the program exits. If $run is true,
# start god, if $run is false, exit normally.
#
# Returns nothing.
at_exit do
  God.at_exit if $run
end

end