lib/biodiversity/parser/scientific_name_clean.treetop in dimus-biodiversity-0.0.18 vs lib/biodiversity/parser/scientific_name_clean.treetop in dimus-biodiversity-0.5.1
- old
+ new
@@ -1,141 +1,141 @@
# encoding: UTF-8
grammar ScientificNameClean
- rule composite_scientific_name
- a:scientific_name space hybrid_separator space b:scientific_name space {
+ rule root
+ space a:scientific_name_5 space {
def value
- a.value + " × " + b.value
+ a.value.gsub(/\s{2,}/, ' ').strip
end
- def canonical
- a.canonical + " × " + b.canonical
- end
- def pos
- a.pos.merge(b.pos)
- end
-
- def details
- {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => b.details}}
- end
- }
- /
- a:scientific_name space hybrid_separator space [\?]? {
- def value
- a.value + " × ?"
- end
-
def canonical
- a.canonical
+ a.canonical.gsub(/\s{2,}/, ' ').strip
end
def pos
a.pos
end
def details
- {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => "?"}}
+ a.details
end
}
- /
- scientific_name
end
- rule scientific_name
- name_part_authors_mix
- /
- space a:name_part space b:authors_part space c:taxon_concept_rank space d:authors_part space {
+ rule scientific_name_5
+ a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
def value
- a.value + " " + b.value + " " + c.apply(d)
+ a.value + " " + b.apply(c)
end
def canonical
a.canonical
end
def pos
- a.pos.merge(b.pos).merge(d.pos)
+ a.pos.merge(c.pos)
end
def details
- a.details.merge(b.details).merge(c.details(d)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ').strip})
- end
+ a.details.merge(b.details(c))
+ end
}
- /
- space a:name_part space b:taxon_concept_rank space c:authors_part space {
+ /
+ scientific_name_4
+ end
+
+ rule scientific_name_4
+ a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
def value
- a.value + " " + b.apply(c)
+ a.value + " × " + b.value
end
-
+
def canonical
- a.canonical
+ a.canonical + " " + b.canonical
end
def pos
- a.pos.merge(c.pos)
+ a.pos.merge(b.pos)
end
-
+
def details
- a.details.merge(b.details(c)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
- end
+ {:hybridFormula => [a.details, b.details]}
+ end
}
- /
- space a:name_part space b:authors_part space c:status_part space {
+ /
+ a:scientific_name_1 space hybrid_character space [\?]? {
def value
- a.value + " " + b.value + " " + c.value
+ a.value + " × ?"
end
+
def canonical
a.canonical
end
def pos
- a.pos.merge(b.pos)
+ a.pos
end
def details
- a.details.merge(b.details).merge(c.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
+ {:hybridFormula => [a.details, "?"]}
end
}
/
- space a:name_part space b:authors_part space {
- def value
+ scientific_name_3
+ end
+
+ rule scientific_name_3
+ a:hybrid_character space b:scientific_name_2 {
+ def value
a.value + " " + b.value
end
+
def canonical
- a.canonical
+ b.canonical
end
def pos
- a.pos.merge(b.pos)
+ b.pos
end
def details
- a.details.merge(b.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
+ {:namedHybrid => b.details}
end
}
/
- space a:name_part space b:year space {
+ scientific_name_2
+ end
+
+ rule scientific_name_2
+ a:scientific_name_1 space b:status_part {
def value
a.value + " " + b.value
end
def canonical
a.canonical
end
def pos
- a.pos.merge(b.pos)
+ a.pos
end
def details
- a.details.merge(b.details).merge({:is_valid => false}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
+ a.details.merge(b.details)
end
}
/
- name_part
+ scientific_name_1
end
+
+ rule scientific_name_1
+ multinomial_name
+ /
+ uninomial_name
+ end
+
rule status_part
a:status_word space b:status_part {
def value
a.value + " " + b.value
end
@@ -154,740 +154,791 @@
end
def details
{:status => value}
end
}
- /
- latin_word
+ #/
+ #latin_word
end
- rule name_part_authors_mix
- a:species_name space b:authors_part space c:subspecies_name space d:authors_part {
+
+ rule multinomial_name
+ a:genus space b:subgenus space c:species space_hard d:infraspecies_mult {
def value
- (a.value + " " + b.value + " " + c.value + " " + d.value).gsub(/\s+/,' ')
+ a.value + " " + b.value + " " + c.value + " " + d.value
end
+
def canonical
- (a.canonical + " " + c.canonical).gsub(/\s+/,' ')
+ a.canonical + " " + c.canonical + " " + d.canonical
end
-
+
def pos
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
end
-
+
def details
- a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ')})
+ a.details.merge(b.details).merge(c.details).merge(d.details)
end
}
- /
- a:species_name space b:authors_part space c:subspecies_name {
- def value
- (a.value + " " + b.value + " " + c.value).gsub(/\s+/,' ')
+ /
+ a:genus space b:subgenus space c:species {
+ def value
+ a.value + " " + b.value + " " + c.value
end
+
def canonical
- (a.canonical + " " + c.canonical).gsub(/\s+/,' ')
+ a.canonical + " " + c.canonical
end
- def details
- a.details.merge(c.details).merge({:species_authors=>b.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ')})
- end
- }
- end
-
- rule authors_part
- a:original_authors_revised_name space b:authors_revised_name {
- def value
- a.value + " " + b.value
- end
def pos
- a.pos.merge(b.pos)
+ a.pos.merge(b.pos).merge(c.pos)
end
def details
- a.details.merge(b.details)
+ a.details.merge(b.details).merge(c.details)
end
}
/
- a:simple_authors_part space ex_sep space b:simple_authors_part {
- def value
- a.value + " ex " + b.value
+ a:genus space_hard b:species space_hard c:infraspecies_mult {
+ def value
+ a.value + " " + b.value + " " + c.value
end
-
+
+ def canonical
+ a.canonical + " " + b.canonical + " " + c.canonical
+ end
+
def pos
- a.pos.merge(b.pos)
+ a.pos.merge(b.pos).merge(c.pos)
end
-
+
def details
- {:revised_name_authors => {:revised_authors => a.details[:authors], :authors => b.details[:authors]}}
+ a.details.merge(b.details).merge(c.details)
end
}
/
- a:original_authors_revised_name space b:authors_names_full {
+ a:genus space_hard b:species {
def value
- a.value + " " + b.value
+ a.value + " " + b.value
end
+
+ def canonical
+ a.canonical + " " + b.canonical
+ end
def pos
a.pos.merge(b.pos)
end
def details
a.details.merge(b.details)
end
}
- /
- authors_revised_name
- /
- original_authors_revised_name
- /
- simple_authors_part
end
- rule simple_authors_part
- a:original_authors_names_full space b:authors_names_full {
+ rule infraspecies_mult
+ a:infraspecies space b:infraspecies_mult {
def value
a.value + " " + b.value
end
+ def canonical
+ a.canonical + " " + b.canonical
+ end
+
def pos
a.pos.merge(b.pos)
end
def details
- a.details.merge(b.details)
+ #{:infraspecies => a.details[:infraspceies] << b.details[:infraspecies]}
+ a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
+ b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
+ a.details.merge({:infraspecies => a_array + b_array})
end
}
/
- original_authors_names_full
- /
- authors_names_full
+ infraspecies
end
- rule original_authors_names_full
- left_bracket space a:authors_names space right_bracket space [,]? space b:year {
+ rule infraspecies
+ a:infraspecies_epitheton space b:authorship {
def value
- "(" + a.value + " " + b.value + ")"
+ a.value + " " + b.value
end
-
+
+ def canonical
+ a.canonical
+ end
+
def pos
a.pos.merge(b.pos)
end
-
+
def details
- {:orig_authors => a.details[:authors], :year => b.details[:year]}
+ {:infraspecies => a.details[:infraspecies].merge(b.details)}
end
}
/
- left_bracket space a:authors_names_full space right_bracket {
- def value
- "(" + a.value + ")"
+ infraspecies_epitheton
+ end
+
+ rule infraspecies_epitheton
+ sel:rank space_hard a:species_word {
+ def value
+ sel.apply(a)
end
+ def canonical
+ sel.canonical(a)
+ end
def pos
- a.pos
+ {a.interval.begin => ['infraspecies', a.interval.end]}
end
-
+
def details
- {:orig_authors => a.details[:authors]}
+ sel.details(a)
end
}
/
- "[" space a:authors_names_full space "]" {
- def value
- "(" + a.value + ")"
- end
-
- def pos
- a.pos
- end
-
- def details
- {:orig_authors => a.details[:authors]}
- end
- }
- /
- left_bracket space a:unknown_auth space right_bracket {
+ species_word ![\.] {
def value
- "(" + a.value + ")"
+ text_value
end
+ def canonical
+ value
+ end
+
def pos
- a.pos
+ {interval.begin => ['infraspecies', interval.end]}
end
-
+
def details
- {:orig_authors => a.details[:authors]}
+ {:infraspecies => {:epitheton => value, :rank => 'n/a'}}
end
}
- /
- left_bracket space "?" space right_bracket {
+ end
+
+ rule taxon_concept_rank
+ "sec." {
def value
- "(?)"
+ "sec."
end
- def details
- {:orig_authors => "unknown"}
+ def apply(a)
+ " " + value + " " + a.value
end
+ def details(a = nil)
+ {:taxon_concept => a.details}
+ end
}
end
-
- rule original_authors_revised_name
- left_bracket space a:authors_revised_name space right_bracket {
+
+ rule rank
+ ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
+ /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
+ {
def value
- "(" + a.value + ")"
+ text_value.strip
end
-
- def pos
- a.pos
+
+ def apply(a)
+ " " + text_value + " " + a.value
end
+
+ def canonical(a)
+ " " + a.value
+ end
- def details
- {:original_revised_name_authors => a.details[:revised_name_authors]}
+ def details(a = nil)
+ {:infraspecies => {:epitheton => (a.value rescue nil), :rank => text_value}}
end
}
+ /
+ rank_forma
end
- rule authors_revised_name
- a:authors_names_full space ex_sep space b:authors_names_full {
+ rule rank_forma
+ ("forma"/"form."/"fo."/"f.")
+ {
def value
- a.value + " ex " + b.value
+ "f."
end
+ def apply(a)
+ " " + value + " " + a.value
+ end
+ def canonical(a)
+ " " + a.value
+ end
+ def details(a = nil)
+ {:infraspecies => {:epitheton => (a.value rescue nil), :rank => value}}
+ end
+ }
+ end
+
+ rule species
+ a:species_epitheton space b:authorship {
+ def value
+ a.value + " " + b.value
+ end
+ def canonical
+ a.canonical
+ end
+
def pos
a.pos.merge(b.pos)
end
def details
- {:revised_name_authors =>{:revised_authors => a.details[:authors], :authors => b.details[:authors]}}
+ {:species => a.details[:species].merge(b.details)}
end
- }
+ }
+ /
+ species_epitheton
end
-
- rule authors_names_full
- a:authors_names space [,]? space b:year {
+
+ rule species_epitheton
+ a:species_word &(space_hard author_prefix_word space_hard) {
def value
- a.value + " " + b.value
+ a.value
end
+ def canonical
+ a.value
+ end
+
def pos
- a.pos.merge(b.pos)
+ {a.interval.begin => ['species', a.interval.end]}
end
-
+
def details
- {:authors => {:names => a.details[:authors][:names]}.merge(b.details)}
+ {:species => {:epitheton => a.value}}
end
}
/
- authors_names
- /
- unknown_auth
- end
-
- rule unknown_auth
- ("auct."/"hort."/"anon."/"ht.") {
- def value
- text_value
+ species_word {
+ def canonical
+ value
end
def pos
- {interval.begin => ['unknown_author', interval.end]}
+ {interval.begin => ['species', interval.end]}
end
def details
- {:authors => "unknown"}
+ {:species => {:epitheton => value}}
end
}
+ /
+ species_word_hybrid
end
- rule ex_sep
- ("ex"/"in")
- end
-
- rule authors_names
- a:author_name space sep:author_name_separator space b:authors_names {
+ rule subgenus
+ left_paren space a:cap_latin_word space right_paren {
def value
- sep.apply(a,b)
+ "(" + a.value + ")"
end
+ def canonical
+ a.value
+ end
+
def pos
- sep.pos(a,b)
+ {a.interval.begin => ['subgenus', a.interval.end]}
end
def details
- sep.details(a,b)
+ {:subgenus => {:epitheton => a.value}}
end
}
- /
- author_name
- end
+ end
- rule author_name_separator
- ("&"/","/"and"/"et") {
- def apply(a,b)
- sep = text_value.strip
- sep = " et" if ["&","and","et"].include? sep
- a.value + sep + " " + b.value
+ rule genus
+ cap_latin_word {
+ def pos
+ {interval.begin => ['genus', interval.end]}
end
- def pos(a,b)
- a.pos.merge(b.pos)
+ def canonical
+ value
end
-
- def details(a,b)
- {:authors => {:names => a.details[:authors][:names] + b.details[:authors][:names]}}
+
+ def details
+ {:genus => {:epitheton => value}}
end
}
end
- rule author_name
- space a:author_word space b:author_name space {
+ rule uninomial_name
+ a:uninomial_epitheton space_hard b:authorship {
def value
a.value + " " + b.value
end
+ def canonical
+ a.canonical
+ end
+
def pos
a.pos.merge(b.pos)
end
def details
- {:authors => {:names => [value]}}
+ {:uninomial => a.details[:uninomial].merge(b.details)}
end
}
/
- author_word
+ uninomial_epitheton
end
+
+ rule uninomial_epitheton
+ cap_latin_word {
+ def canonical
+ value
+ end
+
+ def pos
+ {interval.begin => ['uninomial', interval.end]}
+ end
+
+ def details
+ {:uninomial => {:epitheton => value}}
+ end
+ }
+ end
- rule author_word
- "A S. Xu" {
+ rule authorship
+ a:basionym_authorship_with_parenthesis space b:simple_authorship space c:ex_authorship {
def value
- text_value.strip
+ a.value + " " + b.value + " " + c.value
end
def pos
- {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
+ a.pos.merge(b.pos).merge(c.pos)
end
def details
- {:authors => {:names => [value]}}
+ val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
+ val[:combinationAuthorTeam].merge!(c.details)
+ val
end
}
/
- ("anon."/"f."/"bis"/"arg."/author_prefix/"et al.\{\?\}"/"et al.") {
+ a:basionym_authorship_with_parenthesis space b:simple_authorship {
def value
- text_value.strip
+ a.value + " " + b.value
end
def pos
- #cheating because there are several words in some of them
- {interval.begin => ['author_word', interval.end]}
+ a.pos.merge(b.pos)
end
def details
- {:authors => {:names => [value]}}
+ {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
end
}
- /
- ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-Z]) [^0-9()\s&,]+ {
+ /
+ basionym_authorship_with_parenthesis
+ /
+ a:simple_authorship space b:ex_authorship {
def value
- text_value.gsub(/\s+/, " ").strip
+ a.value + " " + b.value
end
def pos
- {interval.begin => ['author_word', interval.end]}
+ a.pos.merge(b.pos)
end
def details
- {:authors => {:names => [value]}}
+ val = a.details
+ val[:authorship] = text_value.strip
+ val[:basionymAuthorTeam].merge!(b.details)
+ val
end
}
+ /
+ simple_authorship
end
-
- rule author_prefix
- "da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von"
- end
+
- rule name_part
- space a:species_name space b:rank space_hard c:editorials_full {
+ rule basionym_authorship_with_parenthesis
+ left_paren space a:authors_names space right_paren space [,]? space b:year {
def value
- a.value + " " + b.value + " " + c.value
+ "(" + a.value + " " + b.value + ")"
end
- def canonical
- a.canonical
- end
def pos
- a.pos
- end
+ a.pos.merge(b.pos)
+ end
def details
- a.details.merge(b.details).merge(c.details)
+ { :authorship => text_value,
+ :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
+ }
end
}
/
- space a:species_name &(space author_prefix) {
+ left_paren space a:simple_authorship space b:ex_authorship space right_paren {
def value
- a.value
+ "(" + a.value + " " + b.value + ")"
end
- def canonical
- a.canonical
- end
-
def pos
- a.pos
+ a.pos.merge(b.pos)
end
-
+
def details
- a.details
+ val = a.details
+ val[:basionymAuthorTeam].merge!(b.details)
+ val[:authorship] = text_value.strip
+ val
end
}
/
- space a:species_name space b:subspecies_names {
+ left_paren space a:simple_authorship space right_paren {
def value
- a.value + b.value
+ "(" + a.value + ")"
end
- def canonical
- a.canonical + b.canonical
- end
def pos
- a.pos.merge(b.pos)
+ a.pos
end
def details
- a.details.merge(b.details)
+ val = a.details
+ val[:authorship] = text_value
+ val
end
}
/
- space a:species_name space b:species_word ![\.] {
+ left_paren space a:"?" space right_paren {
def value
- a.value + " " + b.value
+ "(?)"
end
- def canonical
- a.canonical + " " + b.value
- end
-
def pos
- a.pos.merge({b.interval.begin => ['subspecies', b.interval.end]})
+ {a.interval.begin => ['unknown_author', a.interval.end]}
end
def details
- a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
+ {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
end
}
- /
- species_name
- /
- cap_latin_word
end
- rule subspecies_names
- a:subspecies_name space b:subspecies_names {
+ rule ex_authorship
+ ex_sep space b:simple_authorship {
def value
- a.value + b.value
+ " ex " + b.value
end
- def canonical
- a.canonical + b.canonical
- end
-
def pos
- a.pos.merge(b.pos)
+ b.pos
end
def details
- c = a.details[:subspecies] + b.details_subspecies
- a.details.merge({:subspecies => c, :is_valid => false})
+ val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
+ val
end
- }
- /
- subspecies_name
+ }
end
- rule subspecies_name
- sel:rank space_hard a:species_word {
- def value
- sel.apply(a)
+
+ rule simple_authorship
+ a:authors_names space [,]? space b:year {
+ def value
+ a.value + " " + b.value
end
- def canonical
- sel.canonical(a)
- end
def pos
- {a.interval.begin => ['subspecies', a.interval.end]}
+ a.pos.merge(b.pos)
end
+
def details
- sel.details(a)
+ details_with_arg(:basionymAuthorTeam)
end
- def details_subspecies
- details[:subspecies]
+
+ def details_with_arg(authorTeamType = 'basionymAuthorTeam')
+ { :authorship => text_value,
+ authorTeamType.to_sym => {
+ :authorTeam => a.text_value.strip
+ }.merge(a.details).merge(b.details)
+ }
end
}
- end
-
- rule editorials_full
- "(" space a:editorials space ")" {
- def value
- "(" + a.value + ")"
- end
+ /
+ authors_names {
def details
- {:editorial_markup => value, :is_valid => false}
+ details = details_with_arg(:basionymAuthorTeam)
+ details[:basionymAuthorTeam].merge!(super)
+ details
end
+
+ def details_with_arg(authorTeamType = 'basionymAuthorTeam')
+ { :authorship => text_value,
+ authorTeamType.to_sym => {
+ :authorTeam => text_value,
+ }
+ }
+ end
}
end
- rule editorials
- space a:rank space [&]? space b:editorials {
+ rule authors_names
+ a:author_name space sep:author_separator space b:authors_names {
def value
- a.value + b.value
+ sep.apply(a,b)
end
+
+ def pos
+ sep.pos(a,b)
+ end
+
def details
- {:editorial_markup => value, :is_valid => false}
- end
+ sep.details(a,b)
+ end
}
/
- rank
+ author_name
+ /
+ unknown_auth
end
- rule rank
- ("morph."/"f.sp."/"B"/"ssp."/"nat"/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
- /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
- {
+
+ rule unknown_auth
+ ("auct."/"hort."/"anon."/"ht.") {
def value
- text_value.strip
+ text_value
end
- def apply(a)
- " " + text_value + " " + a.value
+
+ def pos
+ {interval.begin => ['unknown_author', interval.end]}
end
- def canonical(a)
- " " + a.value
+
+ def details
+ {:author => ["unknown"]}
end
- def details(a = nil)
- {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
- end
}
- /
- rank_forma
end
- rule rank_forma
- ("forma"/"form."/"fo."/"f.")
- {
- def value
- "f."
+ rule ex_sep
+ ("ex"/"in") &[\s]
+ end
+
+ rule author_separator
+ ("&"/","/"and"/"et") {
+ def apply(a,b)
+ sep = text_value.strip
+ sep = " et" if ["&","and","et"].include? sep
+ a.value + sep + " " + b.value
end
- def apply(a)
- " " + value + " " + a.value
+
+ def pos(a,b)
+ a.pos.merge(b.pos)
end
- def canonical(a)
- " " + a.value
+
+ def details(a,b)
+ {:author => a.details[:author] + b.details[:author]}
end
- def details(a = nil)
- {:subspecies => [{:rank => value, :value => (a.value rescue nil)}]}
- end
}
end
- rule species_name
- hybrid_separator space_hard a:cap_latin_word space_hard b:species_word {
+ rule author_name
+ space a:author_prefix_word space b:author_name space {
def value
- "× " + a.value + " " + b.value
- end
- def canonical
a.value + " " + b.value
end
def pos
- {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
+ a.pos.merge(b.pos)
end
def details
- {:genus => a.value, :species => b.value, :cross => 'before'}
+ {:author => [value]}
end
}
/
- hybrid_separator space_hard a:cap_latin_word {
+ space a:author_word space b:author_name space {
def value
- "× " + a.value
+ a.value + " " + b.value
end
- def canonical
- a.value
- end
def pos
- {a.interval.begin => ['uninomial', a.interval.end]}
+ a.pos.merge(b.pos)
end
def details
- {:uninomial => a.value, :cross => 'before'}
+ {:author => [value]}
end
}
/
- a:cap_latin_word space_hard hybrid_separator space_hard b:species_word {
+ author_word
+ end
+
+ rule author_word
+ "A S. Xu" {
def value
- a.value + " × " + b.value
+ text_value.strip
end
- def canonical
- a.value + " " + b.value
- end
def pos
- {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
+ {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
end
def details
- {:genus => a.value, :species => b.value, :cross => 'inside'}
+ {:author => [value]}
end
}
/
- a:cap_latin_word space b:subgenus space c:species_word {
+ ("bis"/"arg."/"et al.\{\?\}"/"et al.") {
def value
- a.value + " " + b.value + " " + c.value
+ text_value.strip
end
- def canonical
- a.value + " " + c.value
- end
def pos
- {a.interval.begin => ['genus', a.interval.end]}.merge(b.pos).merge({c.interval.begin => ['subspecies', c.interval.end]})
+ #cheating because there are several words in some of them
+ {interval.begin => ['author_word', interval.end]}
end
def details
- {:genus => a.value, :subgenus => b.details, :species => c.value}
+ {:author => [value]}
end
- }
- /
- a:cap_latin_word space_hard b:species_word {
+ }
+ /
+ ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
def value
- a.value + " " + b.value
+ text_value
end
- def canonical
- value
- end
def pos
- {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
+ {interval.begin => ['author_word', interval.end]}
end
def details
- {:genus => a.value, :species => b.value}
+ {:author => [value]}
end
}
- end
-
- rule subgenus
- "(" space a:cap_latin_word space ")" {
+ /
+ "X" [^0-9\[\]\(\)\s&,]+ {
def value
- "(" + a.value + ")"
+ text_value
end
- def canonical
- ''
- end
-
def pos
- {a.interval.begin => ['subgenus', a.interval.end]}
+ {interval.begin => ['author_word', interval.end]}
end
def details
- a.value
+ {:author => [value]}
end
}
+ /
+ author_prefix_word
end
- rule taxon_concept_rank
- "sec." {
+ rule author_prefix_word
+ space ("da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von") &space_hard {
def value
- "sec."
+ text_value
end
- def apply(a)
- " " + value + " " + a.value
+
+ def pos
+ #cheating because there are several words in some of them
+ {interval.begin => ['author_word', interval.end]}
end
- def details(a = nil)
- {:taxon_concept => a.details}
- end
}
end
-
- # "subsect."/"subtrib."/"subgen."/"trib."/
- rule genus_rank
- ("subsect."/"subtrib."/"subgen."/"trib.")
- {
+
+ rule cap_latin_word
+ a:([A-Z]/cap_digraph) b:latin_word "?" {
def value
- text_value.strip
+ (a.value rescue a.text_value) + b.value
end
- def apply(a)
- " " + text_value + " " + a.value
+ }
+ /
+ a:([A-Z]/cap_digraph) b:latin_word {
+ def value
+ (a.value rescue a.text_value) + b.value
end
- def canonical(a)
- ""
+ }
+ /
+ ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
+ def value
+ text_value
end
- def details(a = nil)
- {:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]}
- end
}
end
-
- rule cap_latin_word
- a:([A-Z]/cap_digraph) b:latin_word "?" {
+
+ rule species_word_hybrid
+ a:multiplication_sign space b:species_word {
def value
- (a.value rescue a.text_value) + b.value
+ a.value + " " + b.value
end
- def canonical
- value
+ def canonical
+ b.value
end
def pos
- {a.interval.begin => ['uninomial', a.interval.end]}
+ {b.interval.begin => ['species', b.interval.end]}
end
- def details
- {:uninomial => value}
+ def details
+ {:species => {:epitheton => b.value, :namedHybrid => true}}
end
}
/
- a:([A-Z]/cap_digraph) b:latin_word {
+ a:"X" space b:species_word {
def value
- (a.value rescue a.text_value) + b.value
+ "× " + b.value
end
- def canonical
- value
+ def canonical
+ b.value
end
def pos
- {a.interval.begin => ['uninomial',b.interval.end]}
+ {b.interval.begin => ['species', b.interval.end]}
end
- def details
- {:uninomial => value}
+ def details
+ {:species => {:epitheton => b.value, :namedHybrid => true}}
end
}
/
- ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
+ a:"x" space_hard b:species_word {
def value
- text_value
+ "× " + b.value
end
def canonical
- value
+ b.value
end
def pos
- {interval.begin => ['uninomial', interval.end]}
+ {b.interval.begin => ['species', b.interval.end]}
end
def details
- {:uninomial => value}
+ {:species => {:epitheton => b.value, :namedHybrid => true}}
end
}
end
+ #rule species_word
+ ## (!"sensu" a:species_word_0) {
+ # def value
+ # a.value
+ # end
+ # }
+ #end
+
rule species_word
a:[0-9]+ "-"? b:latin_word {
def value
- a.text_value + "-"+ b.value
+ a.text_value + "-" + b.value
end
}
/
latin_word
end
@@ -895,119 +946,111 @@
rule latin_word
a:[a-zëüäöïé] b:full_name_letters {
def value
a.text_value + b.value
end
- def details
- {}
- end
}
/
a:digraph b:full_name_letters {
def value
a.value + b.value
end
- def details
- {}
- end
}
end
-
+
rule full_name_letters
a:digraph b:full_name_letters {
def value
a.value + b.value
end
- def details
- {}
- end
}
/
a:valid_name_letters b:digraph c:full_name_letters {
def value
a.value + b.value + c.value
end
- def details
- {}
- end
}
/
valid_name_letters
end
-
+
rule valid_name_letters
[a-z\-ëüäöïé]+ {
def value
text_value
end
- def details
- {}
- end
}
end
-
- rule cap_digraph
- "Æ" {
- def value
- 'Ae'
- end
- }
- /
- "Œ" {
- def value
- 'Oe'
- end
- }
- end
-
+
+ rule cap_digraph
+ "Æ" {
+ def value
+ 'Ae'
+ end
+ }
+ /
+ "Œ" {
+ def value
+ 'Oe'
+ end
+ }
+ end
+
rule digraph
- "æ" {
- def value
- 'ae'
- end
- }
- /
- "œ" {
- def value
- 'oe'
- end
- }
- end
-
- rule hybrid_separator
- ("x"/"X"/"×") {
+ "æ" {
def value
- "x"
+ 'ae'
end
- def details
- {}
+ }
+ /
+ "œ" {
+ def value
+ 'oe'
end
}
end
-
+
rule year
- year_with_character
- /
- [0-9\?]+ {
+ b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
def value
- text_value.strip
+ a.value
end
def pos
- {interval.begin => ['year', interval.end]}
+ a.pos
end
def details
+ a.details
+ end
+ }
+ /
+ year_number_with_character
+ /
+ year_number
+ end
+
+ rule year_number_with_character
+ a:year_number [a-zA-Z] {
+ def value
+ a.text_value
+ end
+
+ def pos
+ {interval.begin => ['year', interval.end]}
+ end
+
+ def details
{:year => value}
end
}
end
- rule year_with_character
- a:[0-9\?]+ [a-zA-Z] {
+ rule year_number
+ [12] [7890] [0-9] [0-9]? [\?]? {
def value
- a.text_value
+ text_value
end
def pos
{interval.begin => ['year', interval.end]}
end
@@ -1015,45 +1058,41 @@
def details
{:year => value}
end
}
end
-
-# Next two rles only for ( (author) )
-# doesn't touch parenthesis inside another one like (bla-bla-bla1 (bla-bla-bla2))
-
- rule left_bracket
- "( ("/"("
- {
- def value
- "("
- end
- }
- end
-
- rule right_bracket
- ") )"/")"
- {
- def value
- ")"
- end
- }
- end
+ rule left_paren
+ "("
+ end
- rule space
- [\s]* {
- def details
- {
- }
+ rule right_paren
+ ")"
+ end
+
+ rule hybrid_character
+ ("x"/"X") {
+ def value
+ "×"
end
}
+ /
+ multiplication_sign
end
-
- rule space_hard
- [\s]+ {
- def details
- {}
+
+ rule multiplication_sign
+ "×" {
+ def value
+ text_value
end
}
end
-end
+
+ rule space
+ [\s]*
+ end
+
+ rule space_hard
+ [\s]+
+ end
+
+end
\ No newline at end of file