# encoding: UTF-8 grammar ScientificNameClean rule root space a:scientific_name_5 space { def value a.value.gsub(/\s{2,}/, ' ').strip end def canonical a.canonical.gsub(/\s{2,}/, ' ').strip end def pos a.pos end def hybrid a.hybrid end def details a.details.class == Array ? a.details : [a.details] end def parser_run 1 end } end rule scientific_name_5 a:multinomial_name space_hard hybrid_character space_hard b:species { def value a.value + " × " + b.value end def canonical a.canonical + " × " + b.canonical end def pos a.pos.merge(b.pos) end def hybrid true end def details [a.details, b.details.merge({:genus => a.details[:genus]})] end } / a:scientific_name_1 space b:taxon_concept_rank space c:authorship { def value a.value + " " + b.apply(c) end def canonical a.canonical end def pos a.pos.merge(c.pos) end def hybrid a.hybrid end def details a.details.merge(b.details(c)) end } / scientific_name_4 end rule scientific_name_4 a:scientific_name_1 space hybrid_character space b:scientific_name_1 { def value a.value + " × " + b.value end def canonical a.canonical + " × " + b.canonical end def pos a.pos.merge(b.pos) end def hybrid true end def details [a.details, b.details] end } / a:scientific_name_1 space hybrid_character space [\?]? { def value a.value + " × ?" end def canonical a.canonical end def pos a.pos end def hybrid true end def details [a.details, "?"] end } / scientific_name_3 end rule scientific_name_3 a:hybrid_character space b:scientific_name_2 { def value a.value + " " + b.value end def canonical b.canonical end def pos b.pos end def hybrid true end def details b.details end } / scientific_name_2 end rule scientific_name_2 a:scientific_name_1 space b:status_part { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos end def hybrid a.hybrid rescue false end def details a.details.merge(b.details) end } / scientific_name_1 end rule scientific_name_1 multinomial_name / uninomial_name end rule status_part a:status_word space b:status_part { def value a.value + " " + b.value end def details {:status => value} end } / status_word end rule status_word latin_word [\.] { def value text_value.strip end def details {:status => value} end } #/ #latin_word end rule multinomial_name a:genus space b:infragenus space species_prefix? space c:species space_hard d:infraspecies_mult { def value a.value + " " + b.value + " " + c.value + " " + d.value end def canonical a.canonical + " " + c.canonical + " " + d.canonical end def pos a.pos.merge(b.pos).merge(c.pos).merge(d.pos) end def hybrid c.hybrid rescue false end def details a.details.merge(b.details).merge(c.details).merge(d.details) end } / a:genus space b:infragenus space species_prefix? space c:species { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical + " " + c.canonical end def pos a.pos.merge(b.pos).merge(c.pos) end def hybrid c.hybrid rescue false end def details a.details.merge(b.details).merge(c.details) end } / a:genus space species_prefix? space b:species space_hard c:infraspecies_mult { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical + " " + b.canonical + " " + c.canonical end def pos a.pos.merge(b.pos).merge(c.pos) end def hybrid b.hybrid rescue false end def details a.details.merge(b.details).merge(c.details) end } / a:genus space species_prefix? space b:species { def value a.value + " " + b.value end def canonical a.canonical + " " + b.canonical end def pos a.pos.merge(b.pos) end def hybrid b.hybrid rescue false end def details a.details.merge(b.details) end } end rule infraspecies_mult a:infraspecies space b:infraspecies_mult { def value a.value + " " + b.value end def canonical a.canonical + " " + b.canonical end def pos a.pos.merge(b.pos) end def details a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]] b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]] a.details.merge({:infraspecies => a_array + b_array}) end } / infraspecies { def details {:infraspecies => [super[:infraspecies]]} end } end rule infraspecies a:infraspecies_string space b:authorship { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos.merge(b.pos) end def details {:infraspecies => a.details[:infraspecies].merge(b.details)} end } / infraspecies_string end rule infraspecies_string sel:rank space_hard a:species_word { def value sel.apply(a) end def canonical sel.canonical(a) end def pos sel.pos(a) end def details sel.details(a) end } / species_word ![\.] { def value text_value end def canonical value end def pos {interval.begin => ['infraspecies', interval.end]} end def details {:infraspecies => {:string => value, :rank => 'n/a'}} end } end rule taxon_concept_rank ("sec."/"sensu.") { def value "sec." end def apply(a) " " + value + " " + a.value end def details(a = nil) {:taxon_concept => a.details} end } end rule rank ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var"/"subsp."/"subsp"/"subf."/"race"/"α" /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*") { def value text_value.strip end def apply(a) " " + text_value + " " + a.value end def canonical(a) " " + a.value end def pos(a) {interval.begin => ['infraspecific_type', interval.end], a.interval.begin => ['infraspecies', a.interval.end]} end def details(a = nil) {:infraspecies => {:string => (a.value rescue nil), :rank => text_value}} end } / rank_forma end rule rank_forma ("forma"/"form."/"form"/"fo."/"f.") { def value "f." end def apply(a) " " + value + " " + a.value end def canonical(a) " " + a.value end def pos(a) {interval.begin => ['infraspecific_type', interval.end], a.interval.begin => ['infraspecies', a.interval.end]} end def details(a = nil) {:infraspecies => {:string => (a.value rescue nil), :rank => value}} end } end rule species a:species_string space b:authorship { def value a.value + " " + b.value end def canonical a.canonical end def hybrid a.hybrid rescue false end def pos a.pos.merge(b.pos) end def details {:species => a.details[:species].merge(b.details)} end } / species_string end rule species_string # a:species_word &(space_hard author_prefix_word space_hard) { # def value # a.value # end # # def canonical # a.value # end # # def hybrid # a.hybrid rescue false # end # # def pos # {a.interval.begin => ['species', a.interval.end]} # end # # def details # {:species => {:string => a.value}} # end # } # / species_word { def canonical value end def pos {interval.begin => ['species', interval.end]} end def hybrid false end def details {:species => {:string => value}} end } / species_word_hybrid end rule infragenus left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren { def value "(" + a.value + ")" end def canonical a.value end def pos {a.interval.begin => ['infragenus', a.interval.end]} end def details {:infragenus => {:string => a.value}} end } end rule genus a:uninomial_string !(space_hard author_prefix_word space_hard author_word) { def value a.value end def pos {a.interval.begin => ['genus', a.interval.end]} end def canonical a.value end def details {:genus => {:string => a.value}} end } end rule uninomial_name a:uninomial_string space b:infragenus space c:simple_authorship { def value a.value + " " + b.value + " " + c.value end def canonical a.canonical end def pos a.pos.merge(b.pos).merge(c.pos) end def hybrid false end def details {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)} end } / a:uninomial_string space b:infragenus { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos.merge(b.pos) end def hybrid false end def details {:uninomial => a.details[:uninomial].merge(b.details)} end } / a:uninomial_string space_hard b:authorship { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos.merge(b.pos) end def hybrid false end def details {:uninomial => a.details[:uninomial].merge(b.details)} end } / uninomial_string end rule uninomial_string (cap_latin_word_pair/cap_latin_word) { def canonical value end def pos {interval.begin => ['uninomial', interval.end]} end def hybrid false end def details {:uninomial => {:string => value}} end } end rule authorship a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship { def value a.value + " " + b.value + " " + c.value end def pos a.pos.merge(b.pos).merge(c.pos) end def details val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]} val[:combinationAuthorTeam].merge!(c.details) val end } / a:basionym_authorship_with_parenthesis space b:simple_authorship { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]} end } / basionym_authorship_with_parenthesis / a:simple_authorship ","? space b:ex_authorship { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details val = a.details val[:authorship] = text_value.strip val[:basionymAuthorTeam].merge!(b.details) val end } / simple_authorship end rule basionym_authorship_with_parenthesis left_paren space a:authors_names space right_paren space [,]? space b:year { def value "(" + a.value + " " + b.value + ")" end def pos a.pos.merge(b.pos) end def details { :authorship => text_value, :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details) } end } / left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren { def value "(" + a.value + " " + b.value + ")" end def pos a.pos.merge(b.pos) end def details val = a.details val[:basionymAuthorTeam].merge!(b.details) val[:authorship] = text_value.strip val end } / left_paren space a:simple_authorship space right_paren { def value "(" + a.value + ")" end def pos a.pos end def details val = a.details val[:authorship] = text_value val end } / left_paren space a:"?" space right_paren { def value "(?)" end def pos {a.interval.begin => ['unknown_author', a.interval.end]} end def details {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}} end } end rule ex_authorship ex_sep space b:simple_authorship { def value " ex " + b.value end def pos b.pos end def details val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])} val end } end rule simple_authorship a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details details_with_arg(:basionymAuthorTeam) end def details_with_arg(authorTeamType = 'basionymAuthorTeam') { :authorship => text_value, authorTeamType.to_sym => { :authorTeam => a.text_value.strip }.merge(a.details).merge(b.details) } end } / a:authors_names space [,]? space b:year { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details details_with_arg(:basionymAuthorTeam) end def details_with_arg(authorTeamType = 'basionymAuthorTeam') { :authorship => text_value, authorTeamType.to_sym => { :authorTeam => a.text_value.strip }.merge(a.details).merge(b.details) } end } / authors_names { def details details = details_with_arg(:basionymAuthorTeam) details[:basionymAuthorTeam].merge!(super) details end def details_with_arg(authorTeamType = 'basionymAuthorTeam') { :authorship => text_value, authorTeamType.to_sym => { :authorTeam => text_value, } } end } end rule authors_names a:author_name space sep:author_separator space b:authors_names { def value sep.apply(a,b) end def pos sep.pos(a,b) end def details sep.details(a,b) end } / author_name / unknown_auth end rule unknown_auth ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") { def value text_value end def pos {interval.begin => ['unknown_author', interval.end]} end def details {:author => ["unknown"]} end } end rule ex_sep ("ex"/"in") &[\s] end rule author_separator ("&"/","/"and"/"et") { def apply(a,b) sep = text_value.strip sep = " et" if ["&","and","et"].include? sep a.value + sep + " " + b.value end def pos(a,b) a.pos.merge(b.pos) end def details(a,b) {:author => a.details[:author] + b.details[:author]} end } end rule author_name space a:author_name_without_postfix space b:author_postfix_word space !latin_word { def value a.value + ' ' + b.value end def pos a.pos.merge(b.pos) end def details {:author => [value]} end } / author_name_without_postfix end rule author_name_without_postfix space a:author_prefix_word space b:author_name { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details {:author => [value]} end } / a:author_word space b:author_name { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details {:author => [value]} end } / author_word end rule author_word "A S. Xu" { def value text_value.strip end def pos {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]} end def details {:author => [value]} end } / ("arg."/"et al.\{\?\}"/"et al."/"et al") { def value text_value.strip end def pos #cheating because there are several words in some of them {interval.begin => ['author_word', interval.end]} end def details {:author => [value]} end } / ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* { def value text_value end def pos {interval.begin => ['author_word', interval.end]} end def details {:author => [value]} end } / "X" [^0-9\[\]\(\)\s&,]+ { def value text_value end def pos {interval.begin => ['author_word', interval.end]} end def details {:author => [value]} end } / author_prefix_word end rule author_prefix_word space ("ab"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard { def value text_value end def pos #cheating because there are several words in some of them {interval.begin => ['author_word', interval.end]} end } end rule author_postfix_word ("f."/"filius") { def value text_value.strip end def pos {interval.begin => ['author_word', interval.end]} end } end rule cap_latin_word_pair a:cap_latin_word "-" b:cap_latin_word { def value a.value + b.value.downcase end } end rule cap_latin_word a:([A-Z]/cap_digraph) b:latin_word "?" { def value (a.value rescue a.text_value) + b.value end } / a:([A-Z]/cap_digraph) b:latin_word { def value (a.value rescue a.text_value) + b.value end } / a:("AE"/"OE") b:latin_word { def value a.text_value[0..0] + 'e' + b.value end } / ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") { def value text_value end } end rule capped_dotted_char [A-Z] "." { def value text_value end } end rule species_word_hybrid a:multiplication_sign space b:species_word { def value a.value + " " + b.value end def canonical b.value end def hybrid true end def pos {b.interval.begin => ['species', b.interval.end]} end def details {:species => {:string => b.value}} end } / a:"X" space b:species_word { def value "× " + b.value end def canonical b.value end def hybrid true end def pos {b.interval.begin => ['species', b.interval.end]} end def details {:species => {:string => b.value}} end } / a:"x" space_hard b:species_word { def value "× " + b.value end def canonical b.value end def hybrid true end def pos {b.interval.begin => ['species', b.interval.end]} end def details {:species => {:string => b.value}} end } end rule species_prefix ("aff."/"corrig."/"?") &space_hard end rule species_word a:[0-9]+ "-"? b:latin_word { def value num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"} a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-" a_value + b.value end } / latin_word end rule latin_word a:valid_name_letters "-" b:latin_word { def value a.value + "-" + b.value end } / a:valid_name_letter b:valid_name_letters { def value a.value + b.value end } end rule valid_name_letters [a-zëæœ]+ { def value res = '' text_value.split('').each do |l| l = 'ae' if l == 'æ' l = 'oe' if l == 'œ' # not sure if we should normalize ë as well. It is legal in botanical code, but it # might be beneficial to normalize it for the reconsiliation purposes # l = 'e' if l == 'ë' res << l end res end } end rule valid_name_letter [a-zëæœ] { def value res = text_value res = 'ae' if res == 'æ' res = 'oe' if res == 'œ' res end } end rule cap_digraph "Æ" { def value 'Ae' end } / "Œ" { def value 'Oe' end } end rule year b:left_paren space a:(year_number_with_character/year_number) space c:right_paren { def value a.value end def pos a.pos end def details a.details end } / year_number_with_character / year_number end rule year_number_with_character a:year_number [a-zA-Z] { def value a.text_value end def pos {interval.begin => ['year', interval.end]} end def details {:year => value} end } end rule year_number [12] [7890] [0-9] [0-9]? [\?]? { def value text_value end def pos {interval.begin => ['year', interval.end]} end def details {:year => value} end } end rule left_paren "(" end rule right_paren ")" end rule hybrid_character ("x"/"X") { def value "×" end } / multiplication_sign end rule multiplication_sign ("×"/"*") { def value "×" end } end rule space [\s]* end rule space_hard [\s]+ end end