lib/oddb2xml/compare.rb in oddb2xml-2.7.1 vs lib/oddb2xml/compare.rb in oddb2xml-2.7.2

- old
+ new

@@ -1,106 +1,113 @@ -# encoding: utf-8 -require 'xmlsimple' +require "xmlsimple" module Oddb2xml def self.log_timestamp(msg) full_msg = "#{Time.now.strftime("%H:%M:%S")}: #{msg}" puts full_msg - STDOUT.flush + $stdout.flush full_msg end + class StammXML V3_NAME_REG = /_([N,P])_/ attr_accessor :components attr_reader :keys, :sub_key_names, :filename, :basename, :version, :hash - def initialize(filename, components = ['ITEMS']) + def initialize(filename, components = ["ITEMS"]) raise "File #{filename} must exist" unless File.exist?(filename) @filename = filename @basename = File.basename(filename) - @version = V3_NAME_REG.match(filename) ? 3 : 5 + @version = V3_NAME_REG.match(filename) ? 3 : 5 @components = components if @version == 5 @hash = load_file(@filename) else raise "Unsupported version #{@version}" end end + def self.get_component_key_name(component_name) - return 'LIMNAMEBAG' if /LIMITATION/i.match(component_name) - return 'PRODNO' if /PRODUCT/i.match(component_name) - return 'GTIN' if /ITEM/i.match(component_name) - raise "Cannot determine keyname for component #{component_name}" + return "LIMNAMEBAG" if /LIMITATION/i.match?(component_name) + return "PRODNO" if /PRODUCT/i.match?(component_name) + return "GTIN" if /ITEM/i.match?(component_name) + raise "Cannot determine keyname for component #{component_name}" end + def get_limitation_from_v5(item) - get_item('PRODUCTS', item['PRODNO'].first.to_i)['LIMNAMEBAG'] ? ['true'] : nil + get_item("PRODUCTS", item["PRODNO"].first.to_i)["LIMNAMEBAG"] ? ["true"] : nil end + def get_field_from_v5_product(item, field_name) - get_item('PRODUCTS', item['PRODNO'].first.to_i)[field_name] + get_item("PRODUCTS", item["PRODNO"].first.to_i)[field_name] end + def get_items(component_name) if @version == 3 - items = @hash[component_name] + @hash[component_name] else - items = @hash[component_name].first.values.first + @hash[component_name].first.values.first end - items end + def get_item(component_name, id) keyname = StammXML.get_component_key_name(component_name) - get_items(component_name).find{|item| item[keyname].first.to_i == id} + get_items(component_name).find { |item| item[keyname].first.to_i == id } end + def load_file(name) - Oddb2xml.log_timestamp "Reading #{name} #{(File.size(name)/1024/1024).to_i} MB. This may take some time" + Oddb2xml.log_timestamp "Reading #{name} #{(File.size(name) / 1024 / 1024).to_i} MB. This may take some time" XmlSimple.xml_in(IO.read(name)) end end + class CompareV5 DEFAULTS = { - :components => ["PRODUCTS", "LIMITATIONS", "ITEMS",], - :fields_to_ignore => ['COMP', 'DOSAGE_FORMF', 'MEASUREF'], - :fields_as_floats => [ 'PEXT', 'PEXF', 'PPUB' ], - :min_diff_for_floats => 0.01, + components: ["PRODUCTS", "LIMITATIONS", "ITEMS"], + fields_to_ignore: ["COMP", "DOSAGE_FORMF", "MEASUREF"], + fields_as_floats: ["PEXT", "PEXF", "PPUB"], + min_diff_for_floats: 0.01 } def initialize(left, right, options = DEFAULTS.clone) @options = options @left = StammXML.new(left, @options[:components]) @right = StammXML.new(right, @options[:components]) @diff_stat = {} @occurrences = {} @report = [] end - def get_keys(items, key='GTIN') - items.collect{|item| item[key].first.to_i } + + def get_keys(items, key = "GTIN") + items.collect { |item| item[key].first.to_i } end + def get_names(items) - items.collect{|item| item.keys}.flatten.uniq.sort + items.collect { |item| item.keys }.flatten.uniq.sort end + def compare show_header("Start comparing #{@left.filename} with #{@right.filename}") (@left.components & @right.components).each do |name| - begin - puts "\n#{Time.now.strftime("%H:%M:%S")}: Comparing #{name} in #{@left.basename} with #{@right.basename}" - key = StammXML.get_component_key_name(name) - left_items = @left.get_items(name) - next unless left_items - right_items = @right.get_items(name) - next unless right_items - @diff_stat[name] = {} - @occurrences[name] = {} - @diff_stat[name][NR_COMPARED] = 0 - l_names = get_names(left_items) - r_names = get_names(right_items) - compare_names = l_names & r_names - l_keys = get_keys(left_items, key) - r_keys = get_keys(right_items, key) - (l_keys & r_keys).each do |id| - compare_details(name, compare_names, id) - end - key_results_details(name, compare_names, l_keys, r_keys) - rescue => error - puts "Execution failed with #{error}" + puts "\n#{Time.now.strftime("%H:%M:%S")}: Comparing #{name} in #{@left.basename} with #{@right.basename}" + key = StammXML.get_component_key_name(name) + left_items = @left.get_items(name) + next unless left_items + right_items = @right.get_items(name) + next unless right_items + @diff_stat[name] = {} + @occurrences[name] = {} + @diff_stat[name][NR_COMPARED] = 0 + l_names = get_names(left_items) + r_names = get_names(right_items) + compare_names = l_names & r_names + l_keys = get_keys(left_items, key) + r_keys = get_keys(right_items, key) + (l_keys & r_keys).each do |id| + compare_details(name, compare_names, id) end + key_results_details(name, compare_names, l_keys, r_keys) + rescue => error + puts "Execution failed with #{error}" end show_header("Summary comparing #{@left.filename} with #{@right.filename}") puts "Ignored differences in #{@options[:fields_to_ignore]}. Signaled when differences in #{@options[:fields_as_floats]} were bigger than #{@options[:min_diff_for_floats]}" puts @report.join("\n") @diff_stat.each do |component, stats| @@ -114,32 +121,35 @@ @diff_stat rescue => error puts "Execution failed with #{error}" raise error end + private - NR_COMPARED = 'NR_COMPARED' - COUNT = '_count' + + NR_COMPARED = "NR_COMPARED" + COUNT = "_count" def show_header(header) text = Oddb2xml.log_timestamp(header) pad = 5 puts - puts '-'*(text.length+2*pad) - puts ''.ljust(pad) + text - puts '-'*(text.length+2*pad) + puts "-" * (text.length + 2 * pad) + puts "".ljust(pad) + text + puts "-" * (text.length + 2 * pad) puts end + def compare_details(component_name, compare_names, id) l_item = @left.get_item(component_name, id) r_item = @right.get_item(component_name, id) found_one = false length = 32 found = false - detail_name = l_item['DSCR'] ? l_item['DSCR'].first[0..length-1].rjust(length) : ''.rjust(length) + detail_name = l_item["DSCR"] ? l_item["DSCR"].first[0..length - 1].rjust(length) : "".rjust(length) details = "Diff in #{id.to_s.ljust(15)} #{detail_name}" diff_name = component_name - diff_name += 'S' unless /S$/.match(diff_name) + diff_name += "S" unless /S$/.match?(diff_name) @diff_stat[diff_name] ||= {} @occurrences[diff_name] ||= {} @diff_stat[diff_name][NR_COMPARED] ||= 0 @diff_stat[diff_name][NR_COMPARED] += 1 l_item.keys.each do |sub_key| @@ -152,36 +162,37 @@ if @options[:fields_as_floats].index(sub_key) l_float = l_value ? l_value.first.to_f : 0.0 r_float = r_value ? r_value.first.to_f : 0.0 next if (l_float - r_float).abs < @options[:min_diff_for_floats] end - next if (r_value.is_a?(Array) && '--missing--'.eql?(r_value.first)) || (l_value.is_a?(Array) && '--missing--'.eql?(l_value.first)) - # TODO: get_field_from_v5_product + next if (r_value.is_a?(Array) && "--missing--".eql?(r_value.first)) || (l_value.is_a?(Array) && "--missing--".eql?(l_value.first)) + # TODO: get_field_from_v5_product next if r_value.to_s.eql?(l_value.to_s) next if r_value.to_s.upcase.eql?(l_value.to_s.upcase) && @options[:case_insensitive] details += " #{sub_key}: '#{l_value}' != '#{r_value}'" found = found_one = true @diff_stat[diff_name][sub_key] += 1 end - puts details.gsub(/[\[\]]/,'') if found + puts details.gsub(/[\[\]]/, "") if found end def show_keys(keys, batch_size = 20) 0.upto(keys.size) do |idx| next unless idx % batch_size == 0 - puts ' ' + keys[idx..(idx + batch_size-1)].join(' ') + puts " " + keys[idx..(idx + batch_size - 1)].join(" ") end end + def key_results_details(component_name, compare_names, l_keys, r_keys) - component_name += 'S' unless /S$/.match(component_name) - @report << "#{component_name}: Found #{l_keys.size} items only in #{@left.basename} #{r_keys.size} items only in #{@right.basename}, compared #{@diff_stat[component_name][NR_COMPARED]} items" + component_name += "S" unless /S$/.match?(component_name) + @report << "#{component_name}: Found #{l_keys.size} items only in #{@left.basename} #{r_keys.size} items only in #{@right.basename}, compared #{@diff_stat[component_name][NR_COMPARED]} items" keys = r_keys - l_keys - head = "#{component_name}: #{(keys).size} keys only in #{@right.basename}" + head = "#{component_name}: #{keys.size} keys only in #{@right.basename}" puts "#{head}: Keys were #{keys.size}" show_keys(keys) @report << head keys = l_keys - r_keys - head = "#{component_name}: #{(keys).size} keys only in #{@left.basename}" + head = "#{component_name}: #{keys.size} keys only in #{@left.basename}" puts "#{head}: Keys were #{keys.size}" show_keys(keys) @report << head end end