lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.4.3 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.4.4
- old
+ new
@@ -16,19 +16,22 @@
class SynonymNormalized < Struct.new(:name, :canonical_name, :status);end
class VernacularNormalized < Struct.new(:name, :language);end
class ClassificationNormalizer
+ attr_accessor :verbose
+ attr_reader :error_names
def initialize(dwc_instance, verbose = false)
@dwc = dwc_instance
@core = get_fields(@dwc.core)
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
@res = {}
@parser = ParsleyStore.new(1,2)
@verbose = verbose
- @verbose_count = 1000
+ @verbose_count = 10000
+ @error_names = []
end
def normalize
injest_core
calculate_classification_path
@@ -99,20 +102,30 @@
end
def calculate_classification_path
@res.each do |taxon_id, taxon|
next if taxon.classification_path
- get_classification_path(taxon)
+ begin
+ get_classification_path(taxon)
+ rescue DarwinCore::ParentNotCurrentError
+ next
+ end
end
end
def get_classification_path(taxon)
return if taxon.classification_path
if DarwinCore.nil_field?(taxon.parent_id)
taxon.classification_path = [taxon.current_name_canonical]
else
- parent_cp = @res[taxon.parent_id].classification_path
+ begin
+ parent_cp = @res[taxon.parent_id].classification_path
+ rescue NoMethodError #name has a parent which is not a current name
+ error = "The parent of the taxon \'#{taxon.current_name}\' is deprecated"
+ @error_names << {:name => taxon, :error => error}
+ raise DarwinCore::ParentNotCurrentError, error
+ end
if parent_cp
taxon.classification_path = parent_cp + [taxon.current_name_canonical]
else
get_classification_path(@res[taxon.parent_id])
taxon.classification_path = @res[taxon.parent_id].classification_path + [taxon.current_name_canonical]
@@ -153,6 +166,7 @@
end
end
end
end
+