#!/usr/bin/env ruby require 'slop' # If we're loading from source instead of a gem, rubygems # isn't setting load paths for us, so we need to set it ourselves self_load_path = File.expand_path("../lib", File.dirname(__FILE__)) unless $LOAD_PATH.include? self_load_path $LOAD_PATH << self_load_path end require 'traject' require 'traject/indexer' orig_argv = ARGV.dup opts = Slop.new(:strict => true) do banner "traject [options] -c configuration.rb [-c config2.rb] file.mrc" on 'v', 'version', "print version information to stderr" on 'd', 'debug', "Include debug log, -s log.level=debug" on 'h', 'help', "print usage information to stderr" on 'c', 'conf', 'configuration file path (repeatable)', :argument => true, :as => Array on :s, :setting, "settings: `-s key=value` (repeatable)", :argument => true, :as => Array on :r, :reader, "Set reader class, shortcut for `-s reader_class_name=*`", :argument => true on :w, :writer, "Set writer class, shortcut for `-s writer_class_name=*`", :argument => true on :u, :solr, "Set solr url, shortcut for `-s solr.url=*`", :argument => true on :j, "output as pretty printed json, shortcut for `-s writer_class_name=JsonWriter -s json_writer.pretty_print=true`" on :t, :marc_type, "xml, json or binary. shortcut for `-s marc_source.type=*`", :argument => true on :I, "load_path", "append paths to ruby $LOAD_PATH", :argument => true, :as => Array, :delimiter => ":" on :g, "gemfile", "run with bundler and optionally specified Gemfile", :argument => :optional, :default => "" end begin opts.parse! rescue Slop::Error => e $stderr.puts "Error: #{e.message}" $stderr.puts "Exiting..." $stderr.puts $stderr.puts opts.help exit 1 end options = opts.to_hash if options[:version] $stderr.puts "traject version #{Traject::VERSION}" exit 1 end if options[:help] $stderr.puts opts.help exit 1 end # have to use Slop object to tell diff between # no arg supplied and no option -g given at all if opts.present? :gemfile if options[:gemfile] # tell bundler what gemfile to use gem_path = File.expand_path( options[:gemfile] ) # bundler not good at error reporting, we check ourselves unless File.exists? gem_path $stderr.puts "Gemfile `#{options[:gemfile]}` does not exist, exiting..." $stderr.puts $stderr.puts opts.help exit 2 end ENV["BUNDLE_GEMFILE"] = gem_path end require 'bundler/setup' end settings = {} (options[:setting] || []).each do |setting_pair| if setting_pair =~ /\A([^=]+)\=([^=]*)\Z/ key, value = $1, $2 settings[key] = value else $stderr.puts "Unrecognized setting argument '#{setting_pair}':" $stderr.puts "Should be of format -s key=value" exit 3 end end if options[:debug] settings["log.level"] = "debug" end if options[:writer] settings["writer_class_name"] = options[:writer] end if options[:reader] settings["reader_class_name"] = options[:reader] end if options[:solr] settings["solr.url"] = options[:solr] end if options[:j] settings["writer_class_name"] = "JsonWriter" settings["json_writer.pretty_print"] = "true" end if options[:marc_type] settings["marc_source.type"] = options[:marc_type] end (options[:load_path] || []).each do |path| $LOAD_PATH << path unless $LOAD_PATH.include? path end indexer = Traject::Indexer.new indexer.settings( settings ) unless options[:conf] && options[:conf].length > 0 $stderr.puts "Error: Missing required configuration file" $stderr.puts "Exiting..." $stderr.puts $stderr.puts opts.help exit 2 end options[:conf].each do |conf_path| begin indexer.instance_eval(File.open(conf_path).read, conf_path) rescue Errno::ENOENT => e $stderr.puts "Could not find configuration file '#{conf_path}', exiting..." exit 2 rescue Exception => e $stderr.puts "Could not parse configuration file '#{conf_path}'" $stderr.puts " #{e.message}" if e.backtrace.first =~ /\A(.*)\:in/ $stderr.puts " #{$1}" end exit 3 end end ## SAFE TO LOG STARTING HERE. # # Shoudln't log before config files are read above, because # config files set up logger ############## indexer.logger.info("executing with arguments: `#{orig_argv.join(' ')}`") # ARGF might be perfect for this, but problems with it include: # * jruby is broken, no way to set it's encoding, leads to encoding errors reading non-ascii # https://github.com/jruby/jruby/issues/891 # * It's apparently not enough like an IO object for at least one of the ruby-marc XML # readers: # NoMethodError: undefined method `to_inputstream' for ARGF:Object # init at /Users/jrochkind/.gem/jruby/1.9.3/gems/marc-0.5.1/lib/marc/xml_parsers.rb:369 # # * It INSISTS on reading from ARGFV, making it hard to test, or use when you want to give # it a list of files on something other than ARGV. # # So for now we do just one file, or stdin if none given. Sorry! if ARGV.length > 1 $stderr.puts "Sorry, traject can only handle one input file at a time right now. `#{ARGV}` Exiting..." exit 1 end if ARGV.length == 0 indexer.logger.info "Reading from STDIN..." io = $stdin else indexer.logger.info "Reading from #{ARGV.first}" io = File.open(ARGV.first, 'r') end result = indexer.process(io) exit 1 unless result # non-zero exit status on process telling us there's problems.