lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.5.18 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.6.0

- old
+ new

@@ -87,15 +87,31 @@ add_name_string(synonym.canonical_name) end def set_scientific_name(row, fields) row[fields[:scientificname]] = 'N/A' unless row[fields[:scientificname]] - canonical_name = get_canonical_name(row[fields[:scientificname]]) + canonical_name = '' + scientific_name = row[fields[:scientificname]].strip + if separate_canonical_and_authorship?(row, fields) + canonical_name = row[fields[:scientificname]].strip + scientific_name += " #{row[fields[:scientificnameauthorship]].strip}" + else + canonical_name = get_canonical_name(row[fields[:scientificname]]) + end fields[:canonicalname] = row.size row << canonical_name - scientific_name = row[fields[:scientificname]].strip row[fields[:scientificname]] = scientific_name end + + def separate_canonical_and_authorship?(row, fields) + authorship = '' + if fields[:scientificnameauthorship] + authorship = row[fields[:scientificnameauthorship]].to_s.strip + end + !(authorship.empty? || row[fields[:scientificname]].index(authorship)) + end + + def ingest_core raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core_fields[:id] && @core_fields[:scientificname]) @dwc.core.read do |rows| rows[0].each do |r|