lib/mabmapper/cli.rb in mabmapper-1.0.0.pre18 vs lib/mabmapper/cli.rb in mabmapper-2.0.0
- old
+ new
@@ -1,20 +1,21 @@
#
# The command line interface class
#
require 'mabmapper/elasticsearch_writer'
require 'mabmapper/tar_writer'
+require 'metacrunch/ubpb/transformations/mab_to_primo'
module Mabmapper
class Cli
ROOT_DIR = Dir.pwd
def initialize
@options = {}
parse_command_line!
- load_engine!
+ @transformation = Metacrunch::UBPB::Transformations::MabToPrimo.new
process_files!
end
protected
@@ -33,15 +34,10 @@
@options[:debug] = false
opts.on( '-d', '--debug', "Debug mode on." ) do
@options[:debug] = true
end
- @options[:debug_fields] = []
- opts.on( '-f', '--debug-fields a,b,c', Array, "If debug mode is on only fields matching the given names will be debugged." ) do |fields|
- @options[:debug_fields] = fields
- end
-
@options[:silent] = false
opts.on( '-s', '--silent', "Do not output anything on the console" ) do
@options[:silent] = true
end
@@ -73,26 +69,10 @@
puts e.message
(puts optparse.help ; exit)
end
#
- # Load normalization engine
- #
- def load_engine!
- begin
- engine_file = "mabmapper/aleph_mab_xml_engine" # TODO: Make me configurable
- require engine_file
- engine_class_name = "#{engine_file}".classify
- @engine = engine_class_name.constantize.new
- log "#{engine_class_name} loaded!"
- rescue LoadError
- log "Error loading engine #{engine_file}."
- exit 1
- end
- end
-
- #
# Process the input files
#
def process_files!
max_processes = @options[:no_of_procs]
@@ -117,10 +97,32 @@
log "FINISHED"
end
private
+ def hash_to_xml(hash)
+ builder = Nokogiri::XML::Builder.new do |xml|
+ xml.document do
+ hash.each_pair do |_key, _values|
+ if _values.present? || _values == false
+ if _values.is_a?(Array)
+ #xml.send("#{field.name.downcase.pluralize}_") do
+ _values.each do |_value|
+ xml.send("#{_key.downcase}_", _value)
+ end
+ #end
+ else
+ xml.send("#{_key.downcase}_", _values)
+ end
+ end
+ end
+ end
+ end
+
+ builder.to_xml
+ end
+
def process_file(file)
case
when file.end_with?('.tar') then process_tar_file(file)
when file.end_with?('.tar.gz') then process_tar_gz_file(file)
else process_default_file(file)
@@ -135,17 +137,15 @@
tarReader = Gem::Package::TarReader.new(File.open(file, 'r'))
tarReader.each do |entry|
if entry.file?
log "Processing file #{entry.full_name} from archive #{file}"
- result = @engine.process(entry.full_name, entry.read, archive: file)
+ result = @transformation.call(entry.read.force_encoding("utf-8"))
writer.add_file(entry.full_name, 0644) do |f|
- f.write(result.to_xml)
+ f.write(hash_to_xml(result))
end if writer
-
- log "Result for #{entry.full_name} from archive #{file}\n#{result.to_xml(@options[:debug_fields])}\n" if @options[:debug]
end
end
writer.close if writer
end
@@ -161,36 +161,32 @@
begin
tarReader = Gem::Package::TarReader.new(Zlib::GzipReader.open(file))
tarReader.each do |entry|
if entry.file?
log "Processing file #{entry.full_name} from archive #{file}"
- result = @engine.process(entry.full_name, entry.read, archive: file)
+ result = @transformation.call(entry.read.force_encoding("utf-8"))
- xml_result = result.to_xml
+ xml_result = hash_to_xml(result)
writer.add_file_simple(entry.full_name, 0644, xml_result.bytesize) do |f|
f.write(xml_result)
end if writer
-
- log "Result for #{entry.full_name} from archive #{file}\n#{result.to_xml(@options[:debug_fields])}\n" if @options[:debug]
end
end
ensure
writer.close if writer
gzipWriter.close if gzipWriter && !gzipWriter.closed?
end
end
def process_default_file(file)
log "Processing file #{file}"
- result = @engine.process(file, File.open(file, "r").read)
+ result = @transformation.call(File.open(file, "r").read.force_encoding("utf-8"))
if output_dir
out_file = File.join(output_dir, File.basename(file))
- File.open(out_file, 'w') { |f| f.write(result.to_xml) }
+ File.open(out_file, 'w') { |f| f.write(hash_to_xml(result)) }
end
-
- log "Result for #{file}\n#{result.to_xml(@options[:debug_fields])}\n" if @options[:debug]
end
def output_dir
if @options[:output_dir]
dir = File.expand_path(@options[:output_dir])
@@ -208,8 +204,7 @@
else
puts "#{message}"
end
end
end
-
end
end