lib/biodiversity/parser/scientific_name_clean.treetop in dimus-biodiversity-0.0.18 vs lib/biodiversity/parser/scientific_name_clean.treetop in dimus-biodiversity-0.5.1

- old
+ new

@@ -1,141 +1,141 @@ # encoding: UTF-8 grammar ScientificNameClean - rule composite_scientific_name - a:scientific_name space hybrid_separator space b:scientific_name space { + rule root + space a:scientific_name_5 space { def value - a.value + " × " + b.value + a.value.gsub(/\s{2,}/, ' ').strip end - def canonical - a.canonical + " × " + b.canonical - end - def pos - a.pos.merge(b.pos) - end - - def details - {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => b.details}} - end - } - / - a:scientific_name space hybrid_separator space [\?]? { - def value - a.value + " × ?" - end - def canonical - a.canonical + a.canonical.gsub(/\s{2,}/, ' ').strip end def pos a.pos end def details - {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => "?"}} + a.details end } - / - scientific_name end - rule scientific_name - name_part_authors_mix - / - space a:name_part space b:authors_part space c:taxon_concept_rank space d:authors_part space { + rule scientific_name_5 + a:scientific_name_1 space b:taxon_concept_rank space c:authorship { def value - a.value + " " + b.value + " " + c.apply(d) + a.value + " " + b.apply(c) end def canonical a.canonical end def pos - a.pos.merge(b.pos).merge(d.pos) + a.pos.merge(c.pos) end def details - a.details.merge(b.details).merge(c.details(d)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ').strip}) - end + a.details.merge(b.details(c)) + end } - / - space a:name_part space b:taxon_concept_rank space c:authors_part space { + / + scientific_name_4 + end + + rule scientific_name_4 + a:scientific_name_1 space hybrid_character space b:scientific_name_1 { def value - a.value + " " + b.apply(c) + a.value + " × " + b.value end - + def canonical - a.canonical + a.canonical + " " + b.canonical end def pos - a.pos.merge(c.pos) + a.pos.merge(b.pos) end - + def details - a.details.merge(b.details(c)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip}) - end + {:hybridFormula => [a.details, b.details]} + end } - / - space a:name_part space b:authors_part space c:status_part space { + / + a:scientific_name_1 space hybrid_character space [\?]? { def value - a.value + " " + b.value + " " + c.value + a.value + " × ?" end + def canonical a.canonical end def pos - a.pos.merge(b.pos) + a.pos end def details - a.details.merge(b.details).merge(c.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip}) + {:hybridFormula => [a.details, "?"]} end } / - space a:name_part space b:authors_part space { - def value + scientific_name_3 + end + + rule scientific_name_3 + a:hybrid_character space b:scientific_name_2 { + def value a.value + " " + b.value end + def canonical - a.canonical + b.canonical end def pos - a.pos.merge(b.pos) + b.pos end def details - a.details.merge(b.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')}) + {:namedHybrid => b.details} end } / - space a:name_part space b:year space { + scientific_name_2 + end + + rule scientific_name_2 + a:scientific_name_1 space b:status_part { def value a.value + " " + b.value end def canonical a.canonical end def pos - a.pos.merge(b.pos) + a.pos end def details - a.details.merge(b.details).merge({:is_valid => false}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')}) + a.details.merge(b.details) end } / - name_part + scientific_name_1 end + + rule scientific_name_1 + multinomial_name + / + uninomial_name + end + rule status_part a:status_word space b:status_part { def value a.value + " " + b.value end @@ -154,740 +154,791 @@ end def details {:status => value} end } - / - latin_word + #/ + #latin_word end - rule name_part_authors_mix - a:species_name space b:authors_part space c:subspecies_name space d:authors_part { + + rule multinomial_name + a:genus space b:subgenus space c:species space_hard d:infraspecies_mult { def value - (a.value + " " + b.value + " " + c.value + " " + d.value).gsub(/\s+/,' ') + a.value + " " + b.value + " " + c.value + " " + d.value end + def canonical - (a.canonical + " " + c.canonical).gsub(/\s+/,' ') + a.canonical + " " + c.canonical + " " + d.canonical end - + def pos a.pos.merge(b.pos).merge(c.pos).merge(d.pos) end - + def details - a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ')}) + a.details.merge(b.details).merge(c.details).merge(d.details) end } - / - a:species_name space b:authors_part space c:subspecies_name { - def value - (a.value + " " + b.value + " " + c.value).gsub(/\s+/,' ') + / + a:genus space b:subgenus space c:species { + def value + a.value + " " + b.value + " " + c.value end + def canonical - (a.canonical + " " + c.canonical).gsub(/\s+/,' ') + a.canonical + " " + c.canonical end - def details - a.details.merge(c.details).merge({:species_authors=>b.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ')}) - end - } - end - - rule authors_part - a:original_authors_revised_name space b:authors_revised_name { - def value - a.value + " " + b.value - end def pos - a.pos.merge(b.pos) + a.pos.merge(b.pos).merge(c.pos) end def details - a.details.merge(b.details) + a.details.merge(b.details).merge(c.details) end } / - a:simple_authors_part space ex_sep space b:simple_authors_part { - def value - a.value + " ex " + b.value + a:genus space_hard b:species space_hard c:infraspecies_mult { + def value + a.value + " " + b.value + " " + c.value end - + + def canonical + a.canonical + " " + b.canonical + " " + c.canonical + end + def pos - a.pos.merge(b.pos) + a.pos.merge(b.pos).merge(c.pos) end - + def details - {:revised_name_authors => {:revised_authors => a.details[:authors], :authors => b.details[:authors]}} + a.details.merge(b.details).merge(c.details) end } / - a:original_authors_revised_name space b:authors_names_full { + a:genus space_hard b:species { def value - a.value + " " + b.value + a.value + " " + b.value end + + def canonical + a.canonical + " " + b.canonical + end def pos a.pos.merge(b.pos) end def details a.details.merge(b.details) end } - / - authors_revised_name - / - original_authors_revised_name - / - simple_authors_part end - rule simple_authors_part - a:original_authors_names_full space b:authors_names_full { + rule infraspecies_mult + a:infraspecies space b:infraspecies_mult { def value a.value + " " + b.value end + def canonical + a.canonical + " " + b.canonical + end + def pos a.pos.merge(b.pos) end def details - a.details.merge(b.details) + #{:infraspecies => a.details[:infraspceies] << b.details[:infraspecies]} + a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]] + b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]] + a.details.merge({:infraspecies => a_array + b_array}) end } / - original_authors_names_full - / - authors_names_full + infraspecies end - rule original_authors_names_full - left_bracket space a:authors_names space right_bracket space [,]? space b:year { + rule infraspecies + a:infraspecies_epitheton space b:authorship { def value - "(" + a.value + " " + b.value + ")" + a.value + " " + b.value end - + + def canonical + a.canonical + end + def pos a.pos.merge(b.pos) end - + def details - {:orig_authors => a.details[:authors], :year => b.details[:year]} + {:infraspecies => a.details[:infraspecies].merge(b.details)} end } / - left_bracket space a:authors_names_full space right_bracket { - def value - "(" + a.value + ")" + infraspecies_epitheton + end + + rule infraspecies_epitheton + sel:rank space_hard a:species_word { + def value + sel.apply(a) end + def canonical + sel.canonical(a) + end def pos - a.pos + {a.interval.begin => ['infraspecies', a.interval.end]} end - + def details - {:orig_authors => a.details[:authors]} + sel.details(a) end } / - "[" space a:authors_names_full space "]" { - def value - "(" + a.value + ")" - end - - def pos - a.pos - end - - def details - {:orig_authors => a.details[:authors]} - end - } - / - left_bracket space a:unknown_auth space right_bracket { + species_word ![\.] { def value - "(" + a.value + ")" + text_value end + def canonical + value + end + def pos - a.pos + {interval.begin => ['infraspecies', interval.end]} end - + def details - {:orig_authors => a.details[:authors]} + {:infraspecies => {:epitheton => value, :rank => 'n/a'}} end } - / - left_bracket space "?" space right_bracket { + end + + rule taxon_concept_rank + "sec." { def value - "(?)" + "sec." end - def details - {:orig_authors => "unknown"} + def apply(a) + " " + value + " " + a.value end + def details(a = nil) + {:taxon_concept => a.details} + end } end - - rule original_authors_revised_name - left_bracket space a:authors_revised_name space right_bracket { + + rule rank + ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α" + /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*") + { def value - "(" + a.value + ")" + text_value.strip end - - def pos - a.pos + + def apply(a) + " " + text_value + " " + a.value end + + def canonical(a) + " " + a.value + end - def details - {:original_revised_name_authors => a.details[:revised_name_authors]} + def details(a = nil) + {:infraspecies => {:epitheton => (a.value rescue nil), :rank => text_value}} end } + / + rank_forma end - rule authors_revised_name - a:authors_names_full space ex_sep space b:authors_names_full { + rule rank_forma + ("forma"/"form."/"fo."/"f.") + { def value - a.value + " ex " + b.value + "f." end + def apply(a) + " " + value + " " + a.value + end + def canonical(a) + " " + a.value + end + def details(a = nil) + {:infraspecies => {:epitheton => (a.value rescue nil), :rank => value}} + end + } + end + + rule species + a:species_epitheton space b:authorship { + def value + a.value + " " + b.value + end + def canonical + a.canonical + end + def pos a.pos.merge(b.pos) end def details - {:revised_name_authors =>{:revised_authors => a.details[:authors], :authors => b.details[:authors]}} + {:species => a.details[:species].merge(b.details)} end - } + } + / + species_epitheton end - - rule authors_names_full - a:authors_names space [,]? space b:year { + + rule species_epitheton + a:species_word &(space_hard author_prefix_word space_hard) { def value - a.value + " " + b.value + a.value end + def canonical + a.value + end + def pos - a.pos.merge(b.pos) + {a.interval.begin => ['species', a.interval.end]} end - + def details - {:authors => {:names => a.details[:authors][:names]}.merge(b.details)} + {:species => {:epitheton => a.value}} end } / - authors_names - / - unknown_auth - end - - rule unknown_auth - ("auct."/"hort."/"anon."/"ht.") { - def value - text_value + species_word { + def canonical + value end def pos - {interval.begin => ['unknown_author', interval.end]} + {interval.begin => ['species', interval.end]} end def details - {:authors => "unknown"} + {:species => {:epitheton => value}} end } + / + species_word_hybrid end - rule ex_sep - ("ex"/"in") - end - - rule authors_names - a:author_name space sep:author_name_separator space b:authors_names { + rule subgenus + left_paren space a:cap_latin_word space right_paren { def value - sep.apply(a,b) + "(" + a.value + ")" end + def canonical + a.value + end + def pos - sep.pos(a,b) + {a.interval.begin => ['subgenus', a.interval.end]} end def details - sep.details(a,b) + {:subgenus => {:epitheton => a.value}} end } - / - author_name - end + end - rule author_name_separator - ("&"/","/"and"/"et") { - def apply(a,b) - sep = text_value.strip - sep = " et" if ["&","and","et"].include? sep - a.value + sep + " " + b.value + rule genus + cap_latin_word { + def pos + {interval.begin => ['genus', interval.end]} end - def pos(a,b) - a.pos.merge(b.pos) + def canonical + value end - - def details(a,b) - {:authors => {:names => a.details[:authors][:names] + b.details[:authors][:names]}} + + def details + {:genus => {:epitheton => value}} end } end - rule author_name - space a:author_word space b:author_name space { + rule uninomial_name + a:uninomial_epitheton space_hard b:authorship { def value a.value + " " + b.value end + def canonical + a.canonical + end + def pos a.pos.merge(b.pos) end def details - {:authors => {:names => [value]}} + {:uninomial => a.details[:uninomial].merge(b.details)} end } / - author_word + uninomial_epitheton end + + rule uninomial_epitheton + cap_latin_word { + def canonical + value + end + + def pos + {interval.begin => ['uninomial', interval.end]} + end + + def details + {:uninomial => {:epitheton => value}} + end + } + end - rule author_word - "A S. Xu" { + rule authorship + a:basionym_authorship_with_parenthesis space b:simple_authorship space c:ex_authorship { def value - text_value.strip + a.value + " " + b.value + " " + c.value end def pos - {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]} + a.pos.merge(b.pos).merge(c.pos) end def details - {:authors => {:names => [value]}} + val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]} + val[:combinationAuthorTeam].merge!(c.details) + val end } / - ("anon."/"f."/"bis"/"arg."/author_prefix/"et al.\{\?\}"/"et al.") { + a:basionym_authorship_with_parenthesis space b:simple_authorship { def value - text_value.strip + a.value + " " + b.value end def pos - #cheating because there are several words in some of them - {interval.begin => ['author_word', interval.end]} + a.pos.merge(b.pos) end def details - {:authors => {:names => [value]}} + {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]} end } - / - ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-Z]) [^0-9()\s&,]+ { + / + basionym_authorship_with_parenthesis + / + a:simple_authorship space b:ex_authorship { def value - text_value.gsub(/\s+/, " ").strip + a.value + " " + b.value end def pos - {interval.begin => ['author_word', interval.end]} + a.pos.merge(b.pos) end def details - {:authors => {:names => [value]}} + val = a.details + val[:authorship] = text_value.strip + val[:basionymAuthorTeam].merge!(b.details) + val end } + / + simple_authorship end - - rule author_prefix - "da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von" - end + - rule name_part - space a:species_name space b:rank space_hard c:editorials_full { + rule basionym_authorship_with_parenthesis + left_paren space a:authors_names space right_paren space [,]? space b:year { def value - a.value + " " + b.value + " " + c.value + "(" + a.value + " " + b.value + ")" end - def canonical - a.canonical - end def pos - a.pos - end + a.pos.merge(b.pos) + end def details - a.details.merge(b.details).merge(c.details) + { :authorship => text_value, + :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details) + } end } / - space a:species_name &(space author_prefix) { + left_paren space a:simple_authorship space b:ex_authorship space right_paren { def value - a.value + "(" + a.value + " " + b.value + ")" end - def canonical - a.canonical - end - def pos - a.pos + a.pos.merge(b.pos) end - + def details - a.details + val = a.details + val[:basionymAuthorTeam].merge!(b.details) + val[:authorship] = text_value.strip + val end } / - space a:species_name space b:subspecies_names { + left_paren space a:simple_authorship space right_paren { def value - a.value + b.value + "(" + a.value + ")" end - def canonical - a.canonical + b.canonical - end def pos - a.pos.merge(b.pos) + a.pos end def details - a.details.merge(b.details) + val = a.details + val[:authorship] = text_value + val end } / - space a:species_name space b:species_word ![\.] { + left_paren space a:"?" space right_paren { def value - a.value + " " + b.value + "(?)" end - def canonical - a.canonical + " " + b.value - end - def pos - a.pos.merge({b.interval.begin => ['subspecies', b.interval.end]}) + {a.interval.begin => ['unknown_author', a.interval.end]} end def details - a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}}) + {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}} end } - / - species_name - / - cap_latin_word end - rule subspecies_names - a:subspecies_name space b:subspecies_names { + rule ex_authorship + ex_sep space b:simple_authorship { def value - a.value + b.value + " ex " + b.value end - def canonical - a.canonical + b.canonical - end - def pos - a.pos.merge(b.pos) + b.pos end def details - c = a.details[:subspecies] + b.details_subspecies - a.details.merge({:subspecies => c, :is_valid => false}) + val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])} + val end - } - / - subspecies_name + } end - rule subspecies_name - sel:rank space_hard a:species_word { - def value - sel.apply(a) + + rule simple_authorship + a:authors_names space [,]? space b:year { + def value + a.value + " " + b.value end - def canonical - sel.canonical(a) - end def pos - {a.interval.begin => ['subspecies', a.interval.end]} + a.pos.merge(b.pos) end + def details - sel.details(a) + details_with_arg(:basionymAuthorTeam) end - def details_subspecies - details[:subspecies] + + def details_with_arg(authorTeamType = 'basionymAuthorTeam') + { :authorship => text_value, + authorTeamType.to_sym => { + :authorTeam => a.text_value.strip + }.merge(a.details).merge(b.details) + } end } - end - - rule editorials_full - "(" space a:editorials space ")" { - def value - "(" + a.value + ")" - end + / + authors_names { def details - {:editorial_markup => value, :is_valid => false} + details = details_with_arg(:basionymAuthorTeam) + details[:basionymAuthorTeam].merge!(super) + details end + + def details_with_arg(authorTeamType = 'basionymAuthorTeam') + { :authorship => text_value, + authorTeamType.to_sym => { + :authorTeam => text_value, + } + } + end } end - rule editorials - space a:rank space [&]? space b:editorials { + rule authors_names + a:author_name space sep:author_separator space b:authors_names { def value - a.value + b.value + sep.apply(a,b) end + + def pos + sep.pos(a,b) + end + def details - {:editorial_markup => value, :is_valid => false} - end + sep.details(a,b) + end } / - rank + author_name + / + unknown_auth end - rule rank - ("morph."/"f.sp."/"B"/"ssp."/"nat"/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α" - /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*") - { + + rule unknown_auth + ("auct."/"hort."/"anon."/"ht.") { def value - text_value.strip + text_value end - def apply(a) - " " + text_value + " " + a.value + + def pos + {interval.begin => ['unknown_author', interval.end]} end - def canonical(a) - " " + a.value + + def details + {:author => ["unknown"]} end - def details(a = nil) - {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]} - end } - / - rank_forma end - rule rank_forma - ("forma"/"form."/"fo."/"f.") - { - def value - "f." + rule ex_sep + ("ex"/"in") &[\s] + end + + rule author_separator + ("&"/","/"and"/"et") { + def apply(a,b) + sep = text_value.strip + sep = " et" if ["&","and","et"].include? sep + a.value + sep + " " + b.value end - def apply(a) - " " + value + " " + a.value + + def pos(a,b) + a.pos.merge(b.pos) end - def canonical(a) - " " + a.value + + def details(a,b) + {:author => a.details[:author] + b.details[:author]} end - def details(a = nil) - {:subspecies => [{:rank => value, :value => (a.value rescue nil)}]} - end } end - rule species_name - hybrid_separator space_hard a:cap_latin_word space_hard b:species_word { + rule author_name + space a:author_prefix_word space b:author_name space { def value - "× " + a.value + " " + b.value - end - def canonical a.value + " " + b.value end def pos - {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]} + a.pos.merge(b.pos) end def details - {:genus => a.value, :species => b.value, :cross => 'before'} + {:author => [value]} end } / - hybrid_separator space_hard a:cap_latin_word { + space a:author_word space b:author_name space { def value - "× " + a.value + a.value + " " + b.value end - def canonical - a.value - end def pos - {a.interval.begin => ['uninomial', a.interval.end]} + a.pos.merge(b.pos) end def details - {:uninomial => a.value, :cross => 'before'} + {:author => [value]} end } / - a:cap_latin_word space_hard hybrid_separator space_hard b:species_word { + author_word + end + + rule author_word + "A S. Xu" { def value - a.value + " × " + b.value + text_value.strip end - def canonical - a.value + " " + b.value - end def pos - {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]} + {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]} end def details - {:genus => a.value, :species => b.value, :cross => 'inside'} + {:author => [value]} end } / - a:cap_latin_word space b:subgenus space c:species_word { + ("bis"/"arg."/"et al.\{\?\}"/"et al.") { def value - a.value + " " + b.value + " " + c.value + text_value.strip end - def canonical - a.value + " " + c.value - end def pos - {a.interval.begin => ['genus', a.interval.end]}.merge(b.pos).merge({c.interval.begin => ['subspecies', c.interval.end]}) + #cheating because there are several words in some of them + {interval.begin => ['author_word', interval.end]} end def details - {:genus => a.value, :subgenus => b.details, :species => c.value} + {:author => [value]} end - } - / - a:cap_latin_word space_hard b:species_word { + } + / + ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* { def value - a.value + " " + b.value + text_value end - def canonical - value - end def pos - {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]} + {interval.begin => ['author_word', interval.end]} end def details - {:genus => a.value, :species => b.value} + {:author => [value]} end } - end - - rule subgenus - "(" space a:cap_latin_word space ")" { + / + "X" [^0-9\[\]\(\)\s&,]+ { def value - "(" + a.value + ")" + text_value end - def canonical - '' - end - def pos - {a.interval.begin => ['subgenus', a.interval.end]} + {interval.begin => ['author_word', interval.end]} end def details - a.value + {:author => [value]} end } + / + author_prefix_word end - rule taxon_concept_rank - "sec." { + rule author_prefix_word + space ("da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von") &space_hard { def value - "sec." + text_value end - def apply(a) - " " + value + " " + a.value + + def pos + #cheating because there are several words in some of them + {interval.begin => ['author_word', interval.end]} end - def details(a = nil) - {:taxon_concept => a.details} - end } end - - # "subsect."/"subtrib."/"subgen."/"trib."/ - rule genus_rank - ("subsect."/"subtrib."/"subgen."/"trib.") - { + + rule cap_latin_word + a:([A-Z]/cap_digraph) b:latin_word "?" { def value - text_value.strip + (a.value rescue a.text_value) + b.value end - def apply(a) - " " + text_value + " " + a.value + } + / + a:([A-Z]/cap_digraph) b:latin_word { + def value + (a.value rescue a.text_value) + b.value end - def canonical(a) - "" + } + / + ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") { + def value + text_value end - def details(a = nil) - {:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]} - end } end - - rule cap_latin_word - a:([A-Z]/cap_digraph) b:latin_word "?" { + + rule species_word_hybrid + a:multiplication_sign space b:species_word { def value - (a.value rescue a.text_value) + b.value + a.value + " " + b.value end - def canonical - value + def canonical + b.value end def pos - {a.interval.begin => ['uninomial', a.interval.end]} + {b.interval.begin => ['species', b.interval.end]} end - def details - {:uninomial => value} + def details + {:species => {:epitheton => b.value, :namedHybrid => true}} end } / - a:([A-Z]/cap_digraph) b:latin_word { + a:"X" space b:species_word { def value - (a.value rescue a.text_value) + b.value + "× " + b.value end - def canonical - value + def canonical + b.value end def pos - {a.interval.begin => ['uninomial',b.interval.end]} + {b.interval.begin => ['species', b.interval.end]} end - def details - {:uninomial => value} + def details + {:species => {:epitheton => b.value, :namedHybrid => true}} end } / - ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") { + a:"x" space_hard b:species_word { def value - text_value + "× " + b.value end def canonical - value + b.value end def pos - {interval.begin => ['uninomial', interval.end]} + {b.interval.begin => ['species', b.interval.end]} end def details - {:uninomial => value} + {:species => {:epitheton => b.value, :namedHybrid => true}} end } end + #rule species_word + ## (!"sensu" a:species_word_0) { + # def value + # a.value + # end + # } + #end + rule species_word a:[0-9]+ "-"? b:latin_word { def value - a.text_value + "-"+ b.value + a.text_value + "-" + b.value end } / latin_word end @@ -895,119 +946,111 @@ rule latin_word a:[a-zëüäöïé] b:full_name_letters { def value a.text_value + b.value end - def details - {} - end } / a:digraph b:full_name_letters { def value a.value + b.value end - def details - {} - end } end - + rule full_name_letters a:digraph b:full_name_letters { def value a.value + b.value end - def details - {} - end } / a:valid_name_letters b:digraph c:full_name_letters { def value a.value + b.value + c.value end - def details - {} - end } / valid_name_letters end - + rule valid_name_letters [a-z\-ëüäöïé]+ { def value text_value end - def details - {} - end } end - - rule cap_digraph - "Æ" { - def value - 'Ae' - end - } - / - "Œ" { - def value - 'Oe' - end - } - end - + + rule cap_digraph + "Æ" { + def value + 'Ae' + end + } + / + "Œ" { + def value + 'Oe' + end + } + end + rule digraph - "æ" { - def value - 'ae' - end - } - / - "œ" { - def value - 'oe' - end - } - end - - rule hybrid_separator - ("x"/"X"/"×") { + "æ" { def value - "x" + 'ae' end - def details - {} + } + / + "œ" { + def value + 'oe' end } end - + rule year - year_with_character - / - [0-9\?]+ { + b:left_paren space a:(year_number_with_character/year_number) space c:right_paren { def value - text_value.strip + a.value end def pos - {interval.begin => ['year', interval.end]} + a.pos end def details + a.details + end + } + / + year_number_with_character + / + year_number + end + + rule year_number_with_character + a:year_number [a-zA-Z] { + def value + a.text_value + end + + def pos + {interval.begin => ['year', interval.end]} + end + + def details {:year => value} end } end - rule year_with_character - a:[0-9\?]+ [a-zA-Z] { + rule year_number + [12] [7890] [0-9] [0-9]? [\?]? { def value - a.text_value + text_value end def pos {interval.begin => ['year', interval.end]} end @@ -1015,45 +1058,41 @@ def details {:year => value} end } end - -# Next two rles only for ( (author) ) -# doesn't touch parenthesis inside another one like (bla-bla-bla1 (bla-bla-bla2)) - - rule left_bracket - "( ("/"(" - { - def value - "(" - end - } - end - - rule right_bracket - ") )"/")" - { - def value - ")" - end - } - end + rule left_paren + "(" + end - rule space - [\s]* { - def details - { - } + rule right_paren + ")" + end + + rule hybrid_character + ("x"/"X") { + def value + "×" end } + / + multiplication_sign end - - rule space_hard - [\s]+ { - def details - {} + + rule multiplication_sign + "×" { + def value + text_value end } end -end + + rule space + [\s]* + end + + rule space_hard + [\s]+ + end + +end \ No newline at end of file