# encoding: UTF-8 grammar ScientificNameClean rule composite_scientific_name a:scientific_name space hybrid_separator space b:scientific_name space { def value a.value + " × " + b.value end def canonical a.canonical + " × " + b.canonical end def details {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => b.details}} end } / a:scientific_name space hybrid_separator space [\?]? { def value a.value + " × ?" end def canonical a.canonical end def details {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => "?"}} end } / scientific_name end rule scientific_name name_part_authors_mix / space a:name_part space b:authors_part space c:taxon_concept_rank space d:authors_part space { def value a.value + " " + b.value + " " + c.apply(d) end def canonical a.canonical end def details a.details.merge(b.details).merge(c.details(d)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ').strip}) end } / space a:name_part space b:taxon_concept_rank space c:authors_part space { def value a.value + " " + b.apply(c) end def canonical a.canonical end def details a.details.merge(b.details(c)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip}) end } / space a:name_part space b:authors_part space c:status_part space { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical end def details a.details.merge(b.details).merge(c.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip}) end } / space a:name_part space b:authors_part space { def value a.value + " " + b.value end def canonical a.canonical end def details a.details.merge(b.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')}) end } / space a:name_part space b:year space { def value a.value + " " + b.value end def canonical a.canonical end def details a.details.merge(b.details).merge({:is_valid => false}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')}) end } / name_part end rule status_part a:status_word space b:status_part { def value a.value + " " + b.value end def details {:status => value} end } / status_word end rule status_word latin_word [\.] { def value text_value.strip end def details {:status => value} end } / latin_word end rule name_part_authors_mix a:species_name space b:authors_part space c:subspecies_name space d:authors_part { def value (a.value + " " + b.value + " " + c.value + " " + d.value).gsub(/\s+/,' ') end def canonical (a.canonical + " " + c.canonical).gsub(/\s+/,' ') end def details a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ')}) end } / a:species_name space b:authors_part space c:subspecies_name { def value (a.value + " " + b.value + " " + c.value).gsub(/\s+/,' ') end def canonical (a.canonical + " " + c.canonical).gsub(/\s+/,' ') end def details a.details.merge(c.details).merge({:species_authors=>b.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ')}) end } end rule authors_part a:original_authors_revised_name space b:authors_revised_name { def value a.value + " " + b.value end def details a.details.merge(b.details) end } / a:simple_authors_part space ex_sep space b:simple_authors_part { def value a.value + " ex " + b.value end def details {:revised_name_authors => {:revised_authors => a.details[:authors], :authors => b.details[:authors]}} end } / a:original_authors_revised_name space b:authors_names_full { def value a.value + " " + b.value end def details a.details.merge(b.details) end } / authors_revised_name / original_authors_revised_name / simple_authors_part end rule simple_authors_part a:original_authors_names_full space b:authors_names_full { def value a.value + " " + b.value end def details a.details.merge(b.details) end } / original_authors_names_full / authors_names_full end rule original_authors_names_full left_bracket space a:authors_names space right_bracket space [,]? space b:year { def value "(" + a.value + " " + b.value + ")" end def details {:orig_authors => a.details[:authors], :year => b.details[:year]} end } / left_bracket space a:authors_names_full space right_bracket { def value "(" + a.value + ")" end def details {:orig_authors => a.details[:authors]} end } / "[" space a:authors_names_full space "]" { def value "(" + a.value + ")" end def details {:orig_authors => a.details[:authors]} end } / left_bracket space a:unknown_auth space right_bracket { def value "(" + a.value + ")" end def details {:orig_authors => a.details[:authors]} end } / left_bracket space "?" space right_bracket { def value "(?)" end def details {:orig_authors => "unknown"} end } end rule original_authors_revised_name left_bracket space a:authors_revised_name space right_bracket { def value "(" + a.value + ")" end def details {:original_revised_name_authors => a.details[:revised_name_authors]} end } end rule authors_revised_name a:authors_names_full space ex_sep space b:authors_names_full { def value a.value + " ex " + b.value end def details {:revised_name_authors =>{:revised_authors => a.details[:authors], :authors => b.details[:authors]}} end } end rule authors_names_full a:authors_names space [,]? space b:year { def value a.value + " " + b.value end def details {:authors => {:names => a.details[:authors][:names]}.merge(b.details)} end } / authors_names / unknown_auth end rule unknown_auth ("auct."/"hort."/"anon."/"ht.") { def value text_value end def details {:authors => "unknown"} end } end rule ex_sep ("ex"/"in") end rule authors_names a:author_name space sep:author_name_separator space b:authors_names { def value sep.apply(a,b) end def details sep.details(a,b) end } / author_name end rule author_name_separator ("&"/","/"and"/"et") { def apply(a,b) sep = text_value.strip sep = " et" if ["&","and","et"].include? sep a.value + sep + " " + b.value end def details(a,b) {:authors => {:names => a.details[:authors][:names] + b.details[:authors][:names]}} end } end rule author_name space a:author_word space b:author_name space { def value a.value + " " + b.value end def details {:authors => {:names => [value]}} end } / author_word end rule author_word "A S. Xu" { def value text_value.strip end def details {:authors => {:names => [value]}} end } / ("anon."/"f."/"bis"/"arg."/"da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"et al.\{\?\}"/"et al.") { def value text_value.strip end def details {:authors => {:names => [value]}} end } / ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-Z]) [^0-9()\s&,]+ { def value text_value.gsub(/\s+/, " ").strip end def details {:authors => {:names => [value]}} end } end rule name_part space a:species_name space b:rank space_hard c:editorials_full { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical end def details a.details.merge(b.details).merge(c.details) end } / space a:species_name space b:subspecies_names { def value a.value + b.value end def canonical a.canonical + b.canonical end def details a.details.merge(b.details) end } / space a:species_name space b:latin_word ![\.] { def value a.value + " " + b.value end def canonical value end def details a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}}) end } / species_name / cap_latin_word end rule subspecies_names a:subspecies_name space b:subspecies_names { def value a.value + b.value end def canonical a.canonical + b.canonical end def details c = a.details[:subspecies] + b.details_subspecies a.details.merge({:subspecies => c, :is_valid => false}) end } / subspecies_name end rule subspecies_name sel:rank space_hard a:latin_word { def value sel.apply(a) end def canonical sel.canonical(a) end def details sel.details(a) end def details_subspecies details[:subspecies] end } end rule editorials_full "(" space a:editorials space ")" { def value "(" + a.value + ")" end def details {:editorial_markup => value, :is_valid => false} end } end rule editorials space a:rank space [&]? space b:editorials { def value a.value + b.value end def details {:editorial_markup => value, :is_valid => false} end } / rank end rule rank ("morph."/"f.sp."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α" /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*") { def value text_value.strip end def apply(a) " " + text_value + " " + a.value end def canonical(a) " " + a.value end def details(a = nil) {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]} end } / rank_forma end rule rank_forma ("forma"/"form."/"fo."/"f.") { def value "f." end def apply(a) " " + value + " " + a.value end def canonical(a) " " + a.value end def details(a = nil) {:subspecies => [{:rank => value, :value => (a.value rescue nil)}]} end } end rule species_name hybrid_separator space_hard a:cap_latin_word space_hard b:latin_word { def value "× " + a.value + " " + b.value end def canonical a.value + " " + b.value end def details {:genus => a.value, :species => b.value, :cross => 'before'} end } / hybrid_separator space_hard a:cap_latin_word { def value "× " + a.value end def canonical a.value end def details {:uninomial => a.value, :cross => 'before'} end } / a:cap_latin_word space_hard hybrid_separator space_hard b:latin_word { def value a.value + " × " + b.value end def canonical a.value + " " + b.value end def details {:genus => a.value, :species => b.value, :cross => 'inside'} end } / a:cap_latin_word space b:subgenus space c:latin_word { def value a.value + " " + b.value + " " + c.value end def canonical a.value + " " + c.value end def details {:genus => a.value, :subgenus => b.details, :species => c.value} end } / a:cap_latin_word space_hard b:latin_word { def value a.value + " " + b.value end def canonical value end def details {:genus => a.value, :species => b.value} end } end rule subgenus "(" space a:cap_latin_word space ")" { def value "(" + a.value + ")" end def details a.value end } end rule taxon_concept_rank "sec." { def value "sec." end def apply(a) " " + value + " " + a.value end def details(a = nil) {:taxon_concept => a.details} end } end # "subsect."/"subtrib."/"subgen."/"trib."/ rule genus_rank ("subsect."/"subtrib."/"subgen."/"trib.") { def value text_value.strip end def apply(a) " " + text_value + " " + a.value end def canonical(a) " " + a.value end def details(a = nil) {:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]} end } end rule cap_latin_word a:[A-ZŒÆ] b:latin_word "?" { def value a.text_value + b.value end def canonical value end def details {:uninomial => value} end } / a:[A-ZŒÆ] b:latin_word { def value a.text_value + b.value end def canonical value end def details {:uninomial => value} end } / ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") { def value text_value end def canonical value end def details {:uninomial => value} end } end rule latin_word a:[a-zëüäöïé] b:full_name_letters { def value a.text_value + b.value end def details {} end } / a:digraph b:full_name_letters { def value a.value + b.value end def details {} end } end rule full_name_letters a:digraph b:full_name_letters { def value a.value + b.value end def details {} end } / a:valid_name_letters b:digraph c:full_name_letters { def value a.value + b.value + c.value end def details {} end } / valid_name_letters end rule valid_name_letters [a-z\-ëüäöïé]+ { def value text_value end def details {} end } end rule digraph [æ] { def value 'ae' end } / [œ] { def value 'oe' end } end rule hybrid_separator ("x"/"X"/"×") { def value "x" end def details {} end } end rule year year_with_character / [0-9\?]+ { def value text_value.strip end def details {:year => value} end } end rule year_with_character a:[0-9\?]+ [a-zA-Z] { def value a.text_value end def details {:year => value} end } end # Next two rles only for ( (author) ) # doesn't touch parenthesis inside another one like (bla-bla-bla1 (bla-bla-bla2)) rule left_bracket "( ("/"(" { def value "(" end } end rule right_bracket ") )"/")" { def value ")" end } end rule space [\s]* { def details { } end } end rule space_hard [\s]+ { def details {} end } end end