lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.5.2 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.5.3
- old
+ new
@@ -20,17 +20,17 @@
class SynonymNormalized < Struct.new(:name, :canonical_name, :status);end
class VernacularNormalized < Struct.new(:name, :language);end
class ClassificationNormalizer
- attr_reader :error_names, :tree
+ attr_reader :error_names, :tree, :normalized_data
def initialize(dwc_instance)
@dwc = dwc_instance
@core_fields = get_fields(@dwc.core)
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
- @res = {}
+ @normalized_data = {}
@parser = ParsleyStore.new(1,2)
@name_strings = {}
@error_names = []
@tree = {}
end
@@ -43,28 +43,28 @@
@name_strings.keys
end
def normalize
DarwinCore.logger_write(@dwc.object_id, "Started normalization of the classification")
- @res = {}
+ @normalized_data = {}
ingest_core
DarwinCore.logger_write(@dwc.object_id, "Calculating the classification parent/child paths")
calculate_classification_path
DarwinCore.logger_write(@dwc.object_id, "Ingesting data from extensions")
ingest_extensions
- @res
+ @normalized_data
end
private
def get_canonical_name(a_scientific_name)
if R19
a_scientific_name.force_encoding('utf-8')
end
canonical_name = @parser.parse(a_scientific_name, :canonical_only => true)
add_name_string(a_scientific_name)
- add_name_string(canonical_name) unless canonical_name.empty?
+ add_name_string(canonical_name) unless canonical_name.to_s.empty?
canonical_name.empty? ? a_scientific_name : canonical_name
end
def get_fields(element)
data = element.fields.inject({}) { |res, f| res[f[:term].split('/')[-1].downcase.to_sym] = f[:index].to_i; res }
@@ -75,11 +75,11 @@
def status_synonym?(status)
status && !!status.match(/^syn/)
end
def add_synonym_from_core(taxon_id, row)
- taxon = @res[row[taxon_id]] ? @res[row[taxon_id]] : @res[row[taxon_id]] = DarwinCore::TaxonNormalized.new
+ taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
taxon.synonyms << SynonymNormalized.new(
row[@core_fields[:scientificname]],
row[@core_fields[:canonicalname]],
@core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil)
end
@@ -101,11 +101,11 @@
if @core_fields[:acceptednameusageid] && r[@core_fields[:acceptednameusageid]] && r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
add_synonym_from_core(@core_fields[:acceptednameusageid], r)
elsif !@core_fields[:acceptednameusageid] && status_synonym?(r[@core_fields[:taxonomicstatus]])
add_synonym_from_core(parent_id, r)
else
- taxon = @res[r[@core_fields[:id]]] ? @res[r[@core_fields[:id]]] : @res[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
+ taxon = @normalized_data[r[@core_fields[:id]]] ? @normalized_data[r[@core_fields[:id]]] : @normalized_data[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
taxon.id = r[@core_fields[:id]]
taxon.current_name = r[@core_fields[:scientificname]]
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
taxon.parent_id = r[parent_id]
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
@@ -118,11 +118,11 @@
def parent_id
parent_id_field = @core_fields[:highertaxonid] || @core_fields[:parentnameusageid]
end
def calculate_classification_path
- @res.each do |taxon_id, taxon|
+ @normalized_data.each do |taxon_id, taxon|
next if !taxon.classification_path.empty?
begin
get_classification_path(taxon)
rescue DarwinCore::ParentNotCurrentError
next
@@ -137,26 +137,26 @@
taxon.classification_path << taxon.current_name_canonical
taxon.classification_path_id << taxon.id
@tree.merge!(current_node)
else
begin
- parent_cp = @res[taxon.parent_id].classification_path
+ parent_cp = @normalized_data[taxon.parent_id].classification_path
rescue NoMethodError #name has a parent which is not a current name
error = "The parent of the taxon \'#{taxon.current_name}\' is deprecated"
@error_names << {:name => taxon, :error => error}
raise DarwinCore::ParentNotCurrentError, error
end
if parent_cp.empty?
- get_classification_path(@res[taxon.parent_id])
- taxon.classification_path += @res[taxon.parent_id].classification_path + [taxon.current_name_canonical]
- taxon.classification_path_id += @res[taxon.parent_id].classification_path_id + [taxon.id]
- parent_node = @res[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
+ get_classification_path(@normalized_data[taxon.parent_id])
+ taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
+ taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
+ parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
parent_node.merge!(current_node)
else
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
- taxon.classification_path_id += @res[taxon.parent_id].classification_path_id + [taxon.id]
- parent_node = @res[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
+ taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
+ parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
parent_node.merge!(current_node)
end
end
end
@@ -172,11 +172,11 @@
DarwinCore.logger_write(@dwc.object_id, "Ingesting synonyms extension")
ext, fields = *extension
ext.read do |rows|
rows[0].each do |r|
set_scientific_name(r, fields)
- @res[r[fields[:id]]].synonyms << SynonymNormalized.new(
+ @normalized_data[r[fields[:id]]].synonyms << SynonymNormalized.new(
r[fields[:scientificname]],
r[fields[:canonicalname]],
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
end
end
@@ -185,10 +185,10 @@
def ingest_vernaculars(extension)
DarwinCore.logger_write(@dwc.object_id, "Ingesting vernacular names extension")
ext, fields = *extension
ext.read do |rows|
rows[0].each do |r|
- @res[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
+ @normalized_data[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
r[fields[:vernacularname]],
fields[:languagecode] ? r[fields[:languagecode]] : nil)
add_name_string(r[fields[:vernacularname]])
end
end