lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.5.18 vs lib/dwc-archive/classification_normalizer.rb in dwc-archive-0.6.0
- old
+ new
@@ -87,15 +87,31 @@
add_name_string(synonym.canonical_name)
end
def set_scientific_name(row, fields)
row[fields[:scientificname]] = 'N/A' unless row[fields[:scientificname]]
- canonical_name = get_canonical_name(row[fields[:scientificname]])
+ canonical_name = ''
+ scientific_name = row[fields[:scientificname]].strip
+ if separate_canonical_and_authorship?(row, fields)
+ canonical_name = row[fields[:scientificname]].strip
+ scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
+ else
+ canonical_name = get_canonical_name(row[fields[:scientificname]])
+ end
fields[:canonicalname] = row.size
row << canonical_name
- scientific_name = row[fields[:scientificname]].strip
row[fields[:scientificname]] = scientific_name
end
+
+ def separate_canonical_and_authorship?(row, fields)
+ authorship = ''
+ if fields[:scientificnameauthorship]
+ authorship = row[fields[:scientificnameauthorship]].to_s.strip
+ end
+ !(authorship.empty? || row[fields[:scientificname]].index(authorship))
+ end
+
+
def ingest_core
raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core_fields[:id] && @core_fields[:scientificname])
@dwc.core.read do |rows|
rows[0].each do |r|