lib/mabmapper/cli.rb in mabmapper-1.0.0.pre18 vs lib/mabmapper/cli.rb in mabmapper-2.0.0

- old
+ new

@@ -1,20 +1,21 @@ # # The command line interface class # require 'mabmapper/elasticsearch_writer' require 'mabmapper/tar_writer' +require 'metacrunch/ubpb/transformations/mab_to_primo' module Mabmapper class Cli ROOT_DIR = Dir.pwd def initialize @options = {} parse_command_line! - load_engine! + @transformation = Metacrunch::UBPB::Transformations::MabToPrimo.new process_files! end protected @@ -33,15 +34,10 @@ @options[:debug] = false opts.on( '-d', '--debug', "Debug mode on." ) do @options[:debug] = true end - @options[:debug_fields] = [] - opts.on( '-f', '--debug-fields a,b,c', Array, "If debug mode is on only fields matching the given names will be debugged." ) do |fields| - @options[:debug_fields] = fields - end - @options[:silent] = false opts.on( '-s', '--silent', "Do not output anything on the console" ) do @options[:silent] = true end @@ -73,26 +69,10 @@ puts e.message (puts optparse.help ; exit) end # - # Load normalization engine - # - def load_engine! - begin - engine_file = "mabmapper/aleph_mab_xml_engine" # TODO: Make me configurable - require engine_file - engine_class_name = "#{engine_file}".classify - @engine = engine_class_name.constantize.new - log "#{engine_class_name} loaded!" - rescue LoadError - log "Error loading engine #{engine_file}." - exit 1 - end - end - - # # Process the input files # def process_files! max_processes = @options[:no_of_procs] @@ -117,10 +97,32 @@ log "FINISHED" end private + def hash_to_xml(hash) + builder = Nokogiri::XML::Builder.new do |xml| + xml.document do + hash.each_pair do |_key, _values| + if _values.present? || _values == false + if _values.is_a?(Array) + #xml.send("#{field.name.downcase.pluralize}_") do + _values.each do |_value| + xml.send("#{_key.downcase}_", _value) + end + #end + else + xml.send("#{_key.downcase}_", _values) + end + end + end + end + end + + builder.to_xml + end + def process_file(file) case when file.end_with?('.tar') then process_tar_file(file) when file.end_with?('.tar.gz') then process_tar_gz_file(file) else process_default_file(file) @@ -135,17 +137,15 @@ tarReader = Gem::Package::TarReader.new(File.open(file, 'r')) tarReader.each do |entry| if entry.file? log "Processing file #{entry.full_name} from archive #{file}" - result = @engine.process(entry.full_name, entry.read, archive: file) + result = @transformation.call(entry.read.force_encoding("utf-8")) writer.add_file(entry.full_name, 0644) do |f| - f.write(result.to_xml) + f.write(hash_to_xml(result)) end if writer - - log "Result for #{entry.full_name} from archive #{file}\n#{result.to_xml(@options[:debug_fields])}\n" if @options[:debug] end end writer.close if writer end @@ -161,36 +161,32 @@ begin tarReader = Gem::Package::TarReader.new(Zlib::GzipReader.open(file)) tarReader.each do |entry| if entry.file? log "Processing file #{entry.full_name} from archive #{file}" - result = @engine.process(entry.full_name, entry.read, archive: file) + result = @transformation.call(entry.read.force_encoding("utf-8")) - xml_result = result.to_xml + xml_result = hash_to_xml(result) writer.add_file_simple(entry.full_name, 0644, xml_result.bytesize) do |f| f.write(xml_result) end if writer - - log "Result for #{entry.full_name} from archive #{file}\n#{result.to_xml(@options[:debug_fields])}\n" if @options[:debug] end end ensure writer.close if writer gzipWriter.close if gzipWriter && !gzipWriter.closed? end end def process_default_file(file) log "Processing file #{file}" - result = @engine.process(file, File.open(file, "r").read) + result = @transformation.call(File.open(file, "r").read.force_encoding("utf-8")) if output_dir out_file = File.join(output_dir, File.basename(file)) - File.open(out_file, 'w') { |f| f.write(result.to_xml) } + File.open(out_file, 'w') { |f| f.write(hash_to_xml(result)) } end - - log "Result for #{file}\n#{result.to_xml(@options[:debug_fields])}\n" if @options[:debug] end def output_dir if @options[:output_dir] dir = File.expand_path(@options[:output_dir]) @@ -208,8 +204,7 @@ else puts "#{message}" end end end - end end