lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.7.8 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.7.9

- old
+ new

@@ -1,10 +1,10 @@ # encoding: utf-8 require 'parsley-store' -class DarwinCore - +class DarwinCore + class TaxonNormalized attr_accessor :id, :parent_id, :classification_path_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status def initialize @id = @parent_id = @rank = @status = nil @@ -17,11 +17,11 @@ end end class SynonymNormalized < Struct.new(:id, :name, :canonical_name, :status);end - class VernacularNormalized < Struct.new(:name, :language, :locality);end + class VernacularNormalized < Struct.new(:name, :language, :locality, :country_code);end class ClassificationNormalizer attr_reader :error_names, :tree, :normalized_data def initialize(dwc_instance) @@ -80,28 +80,28 @@ canonical_name.to_s.empty? ? a_scientific_name : canonical_name else nil end end - + def get_fields(element) data = element.fields.inject({}) { |res, f| res[f[:term].split('/')[-1].downcase.to_sym] = f[:index].to_i; res } - data[:id] = element.id[:index] + data[:id] = element.id[:index] data end def status_synonym?(status) status && !!status.match(/^syn/) end - + def add_synonym_from_core(taxon_id, row) @synonyms[row[@core_fields[:id]]] = taxon_id taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new synonym = SynonymNormalized.new( row[@core_fields[:id]], - row[@core_fields[:scientificname]], - row[@core_fields[:canonicalname]], + row[@core_fields[:scientificname]], + row[@core_fields[:canonicalname]], @core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil) taxon.synonyms << synonym add_name_string(synonym.name) add_name_string(synonym.canonical_name) end @@ -146,20 +146,20 @@ else taxon = @normalized_data[r[@core_fields[:id]]] ? @normalized_data[r[@core_fields[:id]]] : @normalized_data[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new taxon.id = r[@core_fields[:id]] taxon.current_name = r[@core_fields[:scientificname]] taxon.current_name_canonical = r[@core_fields[:canonicalname]] - taxon.parent_id = has_parent_id? ? r[parent_id] : nil + taxon.parent_id = has_parent_id? ? r[parent_id] : nil taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank] taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus] add_name_string(taxon.current_name) add_name_string(taxon.current_name_canonical) if taxon.current_name_canonical && !taxon.current_name_canonical.empty? end end end end - + def has_parent_id? @has_parent_id ||= @core_fields.has_key?(:highertaxonid) || @core_fields.has_key?(:parentnameusageid) end def parent_id @@ -186,11 +186,11 @@ @tree.merge!(current_node) else parent_cp = parent_cpid = nil if @normalized_data[taxon.parent_id] parent_cp = @normalized_data[taxon.parent_id].classification_path if @with_canonical_names - parent_cpid = @normalized_data[taxon.parent_id].classification_path_id + parent_cpid = @normalized_data[taxon.parent_id].classification_path_id else current_parent = @normalized_data[@synonyms[taxon.parent_id]] if current_parent error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' is deprecated" @error_names << {:data => taxon, :error => :deprecated_parent, :current_parent => current_parent } @@ -198,15 +198,15 @@ parent_cp = current_parent.classification_path if @with_canonical_names parent_cpid = current_parent.classification_path_id else error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' not found" @error_names << {:data => taxon, :error => :deprecated_parent, :current_parent => nil} - end + end end return 'error' unless parent_cpid if parent_cpid.empty? - res = get_classification_path(@normalized_data[taxon.parent_id]) + res = get_classification_path(@normalized_data[taxon.parent_id]) return res if res == 'error' if @with_canonical_names taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical] end taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id] @@ -231,21 +231,21 @@ ext, fields = *e ingest_synonyms(e) if fields.keys.include? :scientificname ingest_vernaculars(e) if fields.keys.include? :vernacularname end end - + def ingest_synonyms(extension) DarwinCore.logger_write(@dwc.object_id, "Ingesting synonyms extension") ext, fields = *extension ext.read do |rows| rows[0].each do |r| set_scientific_name(r, fields) synonym = SynonymNormalized.new( nil, - r[fields[:scientificname]], - r[fields[:canonicalname]], + r[fields[:scientificname]], + r[fields[:canonicalname]], fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil) @normalized_data[r[fields[:id]]].synonyms << synonym add_name_string(synonym.name) add_name_string(synonym.canonical_name) end @@ -260,18 +260,21 @@ language = nil if fields[:language] language = r[fields[:language]] elsif fields[:languagecode] - language = r[fields[:languagecode]] + language = r[fields[:languagecode]] end - + locality = fields[:locality] ? r[fields[:locality]] : nil - + + country_code = fields[:countrycode] ? r[fields[:countrycode]] : nil + vernacular = VernacularNormalized.new( r[fields[:vernacularname]], language, - locality) + locality, + country_code) @normalized_data[r[fields[:id]]].vernacular_names << vernacular add_vernacular_name_string(vernacular.name) end end end