#!/usr/bin/env ruby

require 'slop'


# If we're loading from source instead of a gem, rubygems
# isn't setting load paths for us, so we need to set it ourselves
self_load_path = File.expand_path("../lib", File.dirname(__FILE__))
unless $LOAD_PATH.include? self_load_path
  $LOAD_PATH << self_load_path
end

require 'traject'
require 'traject/indexer'


orig_argv = ARGV.dup


opts = Slop.new(:strict => true) do
  banner "traject [options] -c configuration.rb [-c config2.rb] file.mrc"

  on 'v', 'version', "print version information to stderr"
  on 'd', 'debug', "Include debug log, -s log.level=debug"
  on 'h', 'help', "print usage information to stderr"
  on 'c', 'conf', 'configuration file path (repeatable)', :argument => true, :as => Array
  on :s, :setting, "settings: `-s key=value` (repeatable)", :argument => true, :as => Array
  on :r, :reader, "Set reader class, shortcut for `-s reader_class_name=*`", :argument => true
  on :w, :writer, "Set writer class, shortcut for `-s writer_class_name=*`", :argument => true
  on :u, :solr, "Set solr url, shortcut for `-s solr.url=*`", :argument => true
  on :j, "output as pretty printed json, shortcut for `-s writer_class_name=JsonWriter -s json_writer.pretty_print=true`"
  on :t, :marc_type, "xml, json or binary. shortcut for `-s marc_source.type=*`", :argument => true
  on :I, "load_path", "append paths to ruby $LOAD_PATH", :argument => true, :as => Array, :delimiter => ":"
  on :g, "gemfile", "run with bundler and optionally specified Gemfile", :argument => :optional, :default => ""
end

begin
  opts.parse!
rescue Slop::Error => e
  $stderr.puts "Error: #{e.message}"
  $stderr.puts "Exiting..."
  $stderr.puts
  $stderr.puts opts.help
  exit 1
end


options = opts.to_hash



if options[:version]
  $stderr.puts "traject version #{Traject::VERSION}"
  exit 1
end

if options[:help]
  $stderr.puts opts.help
  exit 1
end

# have to use Slop object to tell diff between
# no arg supplied and no option -g given at all
if opts.present? :gemfile
  if options[:gemfile] 
    # tell bundler what gemfile to use
    gem_path = File.expand_path( options[:gemfile] )
    # bundler not good at error reporting, we check ourselves
    unless File.exists? gem_path
      $stderr.puts "Gemfile `#{options[:gemfile]}` does not exist, exiting..."
      $stderr.puts
      $stderr.puts opts.help
      exit 2
    end

    ENV["BUNDLE_GEMFILE"] = gem_path
  end
  require 'bundler/setup'
end

settings = {}
(options[:setting] || []).each do |setting_pair|

  if setting_pair =~ /\A([^=]+)\=([^=]*)\Z/
    key, value = $1, $2
    settings[key] = value
  else
    $stderr.puts "Unrecognized setting argument '#{setting_pair}':"
    $stderr.puts "Should be of format -s key=value"
    exit 3
  end
end


if options[:debug]
  settings["log.level"] = "debug"
end
if options[:writer]
  settings["writer_class_name"] = options[:writer]
end
if options[:reader]
  settings["reader_class_name"] = options[:reader]
end
if options[:solr]
  settings["solr.url"] = options[:solr]
end
if options[:j]
  settings["writer_class_name"] = "JsonWriter"
  settings["json_writer.pretty_print"] = "true"
end
if options[:marc_type]
  settings["marc_source.type"] = options[:marc_type]
end


(options[:load_path] || []).each do |path|
  $LOAD_PATH << path unless $LOAD_PATH.include? path
end

indexer = Traject::Indexer.new
indexer.settings( settings )

unless options[:conf] && options[:conf].length > 0
  $stderr.puts "Error: Missing required configuration file"
  $stderr.puts "Exiting..."
  $stderr.puts
  $stderr.puts opts.help
  exit 2
end

options[:conf].each do |conf_path|
  begin
    indexer.instance_eval(File.open(conf_path).read, conf_path)
  rescue Errno::ENOENT => e
    $stderr.puts "Could not find configuration file '#{conf_path}', exiting..."
    exit 2
  rescue Exception => e
    $stderr.puts "Could not parse configuration file '#{conf_path}'"
    $stderr.puts "  #{e.message}"
    if e.backtrace.first =~ /\A(.*)\:in/
      $stderr.puts "  #{$1}"
    end
    exit 3
  end
end

## SAFE TO LOG STARTING HERE.
#
#  Shoudln't log before config files are read above, because
#  config files set up logger
##############
indexer.logger.info("executing with arguments: `#{orig_argv.join(' ')}`")


# ARGF might be perfect for this, but problems with it include:
# * jruby is broken, no way to set it's encoding, leads to encoding errors reading non-ascii
#   https://github.com/jruby/jruby/issues/891
# * It's apparently not enough like an IO object for at least one of the ruby-marc XML
#   readers:
#   NoMethodError: undefined method `to_inputstream' for ARGF:Object
#      init at /Users/jrochkind/.gem/jruby/1.9.3/gems/marc-0.5.1/lib/marc/xml_parsers.rb:369
#
# * It INSISTS on reading from ARGFV, making it hard to test, or use when you want to give
#   it a list of files on something other than ARGV.
#
# So for now we do just one file, or stdin if none given. Sorry!
if ARGV.length > 1
  $stderr.puts "Sorry, traject can only handle one input file at a time right now. `#{ARGV}` Exiting..."
  exit 1
end
if ARGV.length == 0
  indexer.logger.info "Reading from STDIN..."
  io = $stdin
else
  indexer.logger.info "Reading from #{ARGV.first}"
  io = File.open(ARGV.first, 'r')
end

result = indexer.process(io)

exit 1 unless result # non-zero exit status on process telling us there's problems.