lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.4.3 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.4.4

- old
+ new

@@ -16,19 +16,22 @@ class SynonymNormalized < Struct.new(:name, :canonical_name, :status);end class VernacularNormalized < Struct.new(:name, :language);end class ClassificationNormalizer + attr_accessor :verbose + attr_reader :error_names def initialize(dwc_instance, verbose = false) @dwc = dwc_instance @core = get_fields(@dwc.core) @extensions = @dwc.extensions.map { |e| [e, get_fields(e)] } @res = {} @parser = ParsleyStore.new(1,2) @verbose = verbose - @verbose_count = 1000 + @verbose_count = 10000 + @error_names = [] end def normalize injest_core calculate_classification_path @@ -99,20 +102,30 @@ end def calculate_classification_path @res.each do |taxon_id, taxon| next if taxon.classification_path - get_classification_path(taxon) + begin + get_classification_path(taxon) + rescue DarwinCore::ParentNotCurrentError + next + end end end def get_classification_path(taxon) return if taxon.classification_path if DarwinCore.nil_field?(taxon.parent_id) taxon.classification_path = [taxon.current_name_canonical] else - parent_cp = @res[taxon.parent_id].classification_path + begin + parent_cp = @res[taxon.parent_id].classification_path + rescue NoMethodError #name has a parent which is not a current name + error = "The parent of the taxon \'#{taxon.current_name}\' is deprecated" + @error_names << {:name => taxon, :error => error} + raise DarwinCore::ParentNotCurrentError, error + end if parent_cp taxon.classification_path = parent_cp + [taxon.current_name_canonical] else get_classification_path(@res[taxon.parent_id]) taxon.classification_path = @res[taxon.parent_id].classification_path + [taxon.current_name_canonical] @@ -153,6 +166,7 @@ end end end end +