# encoding: UTF-8 grammar ScientificName rule composite_scientific_name a:scientific_name space hybrid_separator space b:scientific_name space { def value a.value + " × " + b.value end def canonical a.canonical + " × " + b.canonical end def details {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => b.details}} end } / a:scientific_name space hybrid_separator space [\?]? { def value a.value + " × ?" end def canonical a.canonical end def details {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => "?"}} end } / scientific_name end rule scientific_name name_part_authors_mix / space a:name_part space b:authors_part space c:status_part space { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical end def details a.details.merge(b.details).merge(c.details) end } / space a:name_part space b:authors_part space { def value a.value + " " + b.value end def canonical a.canonical end def details a.details.merge(b.details) end } / space a:name_part space b:year space { def value a.value + " " + b.value end def canonical a.canonical end def details a.details.merge(b.details).merge({:is_valid => false}) end } / name_part end rule status_part a:status_word space b:status_part { def value a.value + " " + b.value end def details {:status => value} end } / status_word end rule status_word latin_word [\.] { def value text_value.strip end def details {:status => value} end } / latin_word end rule name_part_authors_mix a:species_name space b:authors_part space c:subspecies_name space d:authors_part { def value a.value + " " + b.value + " " + c.value + " " + d.value end def canonical a.canonical + " " + c.canonical end def details a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}) end } / a:species_name space b:authors_part space c:subspecies_name { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical + " " + c.canonical end def details a.details.merge(c.details).merge({:species_authors=>b.details}) end } end rule authors_part a:original_authors_revised_name space b:authors_revised_name { def value a.value + " " + b.value end def details a.details.merge(b.details) end } / a:simple_authors_part space ex_sep space b:simple_authors_part { def value a.value + " ex " + b.value end def details {:revised_name_authors => {:revised_authors => a.details[:authors], :authors => b.details[:authors]}} end } / a:original_authors_revised_name space b:authors_names_full { def value a.value + " " + b.value end def details a.details.merge(b.details) end } / authors_revised_name / original_authors_revised_name / simple_authors_part end rule simple_authors_part a:original_authors_names_full space b:authors_names_full { def value a.value + " " + b.value end def details a.details.merge(b.details) end } / original_authors_names_full / authors_names_full end rule original_authors_names_full "(" space a:authors_names space ")" space [,]? space b:year { def value "(" + a.value + " " + b.value + ")" end def details {:orig_authors => a.details[:authors], :year => b.details[:year]} end } / "(" space a:authors_names_full space ")" { def value "(" + a.value + ")" end def details {:orig_authors => a.details[:authors]} end } end rule original_authors_revised_name "(" space a:authors_revised_name space ")" { def value "(" + a.value + ")" end def details {:original_revised_name_authors => a.details[:revised_name_authors]} end } end rule authors_revised_name a:authors_names_full space ex_sep space b:authors_names_full { def value a.value + " ex " + b.value end def details {:revised_name_authors =>{:revised_authors => a.details[:authors], :authors => b.details[:authors]}} end } end rule authors_names_full a:authors_names space [,]? space b:year { def value a.value + " " + b.value end def details {:authors => {:names => a.details[:authors][:names]}.merge(b.details)} end } / authors_names end rule ex_sep ("ex"/"in") end rule authors_names a:author_name space sep:author_name_separator space b:authors_names { def value sep.apply(a,b) end def details sep.details(a,b) end } / author_name end rule author_name_separator ("&"/","/"and"/"et") { def apply(a,b) sep = text_value.strip sep = " et" if ["&","and","et"].include? sep a.value + sep + " " + b.value end def details(a,b) {:authors => {:names => a.details[:authors][:names] + b.details[:authors][:names]}} end } end rule author_name space a:author_word space b:author_name space { def value a.value + " " + b.value end def details {:authors => {:names => [value]}} end } / author_word end rule author_word "A S. Xu" / ("anon."/"f."/"bis"/"arg."/"da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"et al.\{\?\}"/"et al.") { def value text_value.strip end def details {:authors => {:names => [value]}} end } / ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-Z]) [^0-9()\s&,]+ { def value text_value.gsub(/\s+/, " ").strip end def details {:authors => {:names => [value]}} end } end rule name_part space a:species_name space b:rank space_hard c:editorials_full { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical end def details a.details.merge(b.details).merge(c.details) end } / space a:species_name space b:subspecies_names { def value a.value + b.value end def canonical a.canonical + b.canonical end def details a.details.merge(b.details) end } / space a:species_name space b:latin_word ![\.] { def value a.value + " " + b.value end def canonical value end def details a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}}) end } / species_name / cap_latin_word end rule subspecies_names a:subspecies_name space b:subspecies_names { def value a.value + b.value end def canonical a.canonical + b.canonical end def details c = a.details[:subspecies] + b.details_subspecies a.details.merge({:subspecies => c, :is_valid => false}) end } / subspecies_name end rule subspecies_name sel:rank space_hard a:latin_word { def value sel.apply(a) end def canonical sel.canonical(a) end def details sel.details(a) end def details_subspecies details[:subspecies] end } end rule editorials_full "(" space a:editorials space ")" { def value "(" + a.value + ")" end def details {:editorial_markup => value, :is_valid => false} end } end rule editorials space a:rank space [&]? space b:editorials { def value a.value + b.value end def details {:editorial_markup => value, :is_valid => false} end } / rank end rule rank ("f.sp."/"f."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α" /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*") { def value text_value.strip end def apply(a) " " + text_value + " " + a.value end def canonical(a) " " + a.value end def details(a = nil) {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]} end } end rule species_name hybrid_separator space_hard a:cap_latin_word space_hard b:latin_word { def value "× " + a.value + " " + b.value end def canonical a.value + " " + b.value end def details {:genus => a.value, :species => b.value, :cross => 'before'} end } / hybrid_separator space_hard a:cap_latin_word { def value "× " + a.value end def canonical a.value end def details {:uninomial => a.value, :cross => 'before'} end } / a:cap_latin_word space_hard hybrid_separator space_hard b:latin_word { def value a.value + " × " + b.value end def canonical a.value + " " + b.value end def details {:genus => a.value, :species => b.value, :cross => 'inside'} end } / a:cap_latin_word space b:subgenus space c:latin_word { def value a.value + " " + b.value + " " + c.value end def canonical a.value + " " + c.value end def details {:genus => a.value, :subgenus => b.details, :species => c.value} end } / a:cap_latin_word space_hard b:latin_word { def value a.value + " " + b.value end def canonical value end def details {:genus => a.value, :species => b.value} end } end rule subgenus "(" space a:cap_latin_word space ")" { def value "(" + a.value + ")" end def details a.value end } end rule cap_latin_word a:[A-Z] b:latin_word { def value a.text_value + b.value end def canonical value end def details {:uninomial => value} end } end rule latin_word a:[a-zë] b:full_name_letters { def value a.text_value + b.value end } / a:digraph b:full_name_letters { def value a.value + b.value end } end rule full_name_letters a:digraph b:full_name_letters { def value a.value + b.value end } / a:valid_name_letters b:digraph c:full_name_letters { def value a.value + b.value + c.value end } / valid_name_letters end rule valid_name_letters [a-z\-ëüäöï]+ { def value text_value end } end rule digraph [æ] { def value 'ae' end } / [œ] { def value 'oe' end } end rule hybrid_separator ("x"/"X"/"×") { def value "x" end } end rule year year_with_character / [0-9\?]+ { def value text_value.strip end def details {:year => value} end } end rule year_with_character a:[0-9\?]+ [a-zA-Z] { def value a.text_value end def details {:year => value} end } end rule space [\s]* end rule space_hard [\s]+ end end