lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.7.8 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.7.9
- old
+ new
@@ -1,10 +1,10 @@
# encoding: utf-8
require 'parsley-store'
-class DarwinCore
-
+class DarwinCore
+
class TaxonNormalized
attr_accessor :id, :parent_id, :classification_path_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
def initialize
@id = @parent_id = @rank = @status = nil
@@ -17,11 +17,11 @@
end
end
class SynonymNormalized < Struct.new(:id, :name, :canonical_name, :status);end
- class VernacularNormalized < Struct.new(:name, :language, :locality);end
+ class VernacularNormalized < Struct.new(:name, :language, :locality, :country_code);end
class ClassificationNormalizer
attr_reader :error_names, :tree, :normalized_data
def initialize(dwc_instance)
@@ -80,28 +80,28 @@
canonical_name.to_s.empty? ? a_scientific_name : canonical_name
else
nil
end
end
-
+
def get_fields(element)
data = element.fields.inject({}) { |res, f| res[f[:term].split('/')[-1].downcase.to_sym] = f[:index].to_i; res }
- data[:id] = element.id[:index]
+ data[:id] = element.id[:index]
data
end
def status_synonym?(status)
status && !!status.match(/^syn/)
end
-
+
def add_synonym_from_core(taxon_id, row)
@synonyms[row[@core_fields[:id]]] = taxon_id
taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
synonym = SynonymNormalized.new(
row[@core_fields[:id]],
- row[@core_fields[:scientificname]],
- row[@core_fields[:canonicalname]],
+ row[@core_fields[:scientificname]],
+ row[@core_fields[:canonicalname]],
@core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil)
taxon.synonyms << synonym
add_name_string(synonym.name)
add_name_string(synonym.canonical_name)
end
@@ -146,20 +146,20 @@
else
taxon = @normalized_data[r[@core_fields[:id]]] ? @normalized_data[r[@core_fields[:id]]] : @normalized_data[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
taxon.id = r[@core_fields[:id]]
taxon.current_name = r[@core_fields[:scientificname]]
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
- taxon.parent_id = has_parent_id? ? r[parent_id] : nil
+ taxon.parent_id = has_parent_id? ? r[parent_id] : nil
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
add_name_string(taxon.current_name)
add_name_string(taxon.current_name_canonical) if taxon.current_name_canonical && !taxon.current_name_canonical.empty?
end
end
end
end
-
+
def has_parent_id?
@has_parent_id ||= @core_fields.has_key?(:highertaxonid) || @core_fields.has_key?(:parentnameusageid)
end
def parent_id
@@ -186,11 +186,11 @@
@tree.merge!(current_node)
else
parent_cp = parent_cpid = nil
if @normalized_data[taxon.parent_id]
parent_cp = @normalized_data[taxon.parent_id].classification_path if @with_canonical_names
- parent_cpid = @normalized_data[taxon.parent_id].classification_path_id
+ parent_cpid = @normalized_data[taxon.parent_id].classification_path_id
else
current_parent = @normalized_data[@synonyms[taxon.parent_id]]
if current_parent
error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' is deprecated"
@error_names << {:data => taxon, :error => :deprecated_parent, :current_parent => current_parent }
@@ -198,15 +198,15 @@
parent_cp = current_parent.classification_path if @with_canonical_names
parent_cpid = current_parent.classification_path_id
else
error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' not found"
@error_names << {:data => taxon, :error => :deprecated_parent, :current_parent => nil}
- end
+ end
end
return 'error' unless parent_cpid
if parent_cpid.empty?
- res = get_classification_path(@normalized_data[taxon.parent_id])
+ res = get_classification_path(@normalized_data[taxon.parent_id])
return res if res == 'error'
if @with_canonical_names
taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
end
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
@@ -231,21 +231,21 @@
ext, fields = *e
ingest_synonyms(e) if fields.keys.include? :scientificname
ingest_vernaculars(e) if fields.keys.include? :vernacularname
end
end
-
+
def ingest_synonyms(extension)
DarwinCore.logger_write(@dwc.object_id, "Ingesting synonyms extension")
ext, fields = *extension
ext.read do |rows|
rows[0].each do |r|
set_scientific_name(r, fields)
synonym = SynonymNormalized.new(
nil,
- r[fields[:scientificname]],
- r[fields[:canonicalname]],
+ r[fields[:scientificname]],
+ r[fields[:canonicalname]],
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
@normalized_data[r[fields[:id]]].synonyms << synonym
add_name_string(synonym.name)
add_name_string(synonym.canonical_name)
end
@@ -260,18 +260,21 @@
language = nil
if fields[:language]
language = r[fields[:language]]
elsif fields[:languagecode]
- language = r[fields[:languagecode]]
+ language = r[fields[:languagecode]]
end
-
+
locality = fields[:locality] ? r[fields[:locality]] : nil
-
+
+ country_code = fields[:countrycode] ? r[fields[:countrycode]] : nil
+
vernacular = VernacularNormalized.new(
r[fields[:vernacularname]],
language,
- locality)
+ locality,
+ country_code)
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
add_vernacular_name_string(vernacular.name)
end
end
end