# encoding: UTF-8
grammar ScientificNameClean

  rule root
    space a:scientific_name_5 space {
      def value
        a.value.gsub(/\s{2,}/, ' ').strip
      end
      
      def canonical
        a.canonical.gsub(/\s{2,}/, ' ').strip
      end
      
      def pos
        a.pos
      end
      
      def hybrid
        a.hybrid
      end
      
      def details
        a.details.class == Array ? a.details : [a.details]
      end
    }
  end
  
  rule scientific_name_5
    a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
      def value
        a.value + " " + b.apply(c)
      end
    
      def canonical
        a.canonical
      end
      
      def pos
        a.pos.merge(c.pos)
      end
      
      def hybrid
        a.hybrid
      end
      
      def details
        a.details.merge(b.details(c))
      end
    }
    /
    scientific_name_4
  end
  
  rule scientific_name_4
    a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
      def value
        a.value + " × " + b.value
      end
      
      def canonical
        a.canonical + " " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        true
      end
      
      def details
        [a.details, b.details]
      end
    }
    /
    a:scientific_name_1 space hybrid_character space [\?]? {
      def value
        a.value + " × ?"
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos
      end
      
      def hybrid
        true
      end
      
      def details
        [a.details, "?"]
      end
    }
    /
    scientific_name_3
  end
  
  rule scientific_name_3
    a:hybrid_character space b:scientific_name_2 {
      def  value
        a.value + " " + b.value
      end
      
      def canonical
        b.canonical
      end
      
      def pos
        b.pos
      end
      
      def hybrid
        true
      end
      
      def details
        b.details
      end
    }
    /
    scientific_name_2
  end
  
  rule scientific_name_2
    a:scientific_name_1 space b:status_part {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos
      end
      
      def hybrid
        a.hybrid rescue false
      end
      
      def details
        a.details.merge(b.details)
      end
    }
    /
    scientific_name_1
  end

  rule scientific_name_1
    multinomial_name
    /
    uninomial_name  
  end
  
  
  rule status_part
    a:status_word space b:status_part {
      def value
        a.value + " " + b.value
      end
      def details
        {:status => value}
      end
    }
    /
    status_word
  end
  
  rule status_word
    latin_word [\.] {
      def value
        text_value.strip
      end
      def details
        {:status => value}
      end
    }
    #/
    #latin_word
  end
  
  
  rule multinomial_name
    a:genus space b:subgenus space species_prefix? space c:species space_hard d:infraspecies_mult {
      def value
        a.value + " " + b.value + " " + c.value + " " + d.value
      end
    
      def canonical
        a.canonical + " " + b.canonical + " " + c.canonical + " " + d.canonical
      end
    
      def pos
        a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
      end
      
      def hybrid
        c.hybrid rescue false
      end
    
      def details
        a.details.merge(b.details).merge(c.details).merge(d.details)
      end
    }
    /  
    a:genus space b:subgenus space species_prefix? space c:species {
      def value
        a.value + " " + b.value + " " + c.value
      end
      
      def canonical
        a.canonical + " " + c.canonical
      end
      
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def hybrid
        c.hybrid rescue false
      end
      
      def details
        a.details.merge(b.details).merge(c.details)
      end
    }
    /
    a:genus space species_prefix? space b:species space_hard c:infraspecies_mult {
      def value
        a.value + " " + b.value + " " + c.value 
      end

      def canonical
        a.canonical + " " + b.canonical + " " + c.canonical
      end
    
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def hybrid
        b.hybrid rescue false
      end
    
      def details
        a.details.merge(b.details).merge(c.details)
      end
    }
    /
    a:genus space species_prefix? space b:species {
      def value
        a.value + " " + b.value 
      end

      def canonical
        a.canonical + " " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        b.hybrid rescue false
      end
      
      def details
        a.details.merge(b.details)
      end
    }
  end
  
  rule infraspecies_mult
    a:infraspecies space b:infraspecies_mult {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical + " " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        a_array =  a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]] 
        b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
        a.details.merge({:infraspecies => a_array + b_array})
      end
    }
    /
    infraspecies {
      def details
        {:infraspecies => [super[:infraspecies]]}
      end
    }
  end
  
  rule infraspecies
    a:infraspecies_epitheton space b:authorship {
      def value
        a.value + " " + b.value
      end
  
      def canonical
        a.canonical
      end
  
      def pos
        a.pos.merge(b.pos)
      end
  
      def details
        {:infraspecies => a.details[:infraspecies].merge(b.details)}
      end
    }
    /
    infraspecies_epitheton 
  end
  
  rule infraspecies_epitheton
    sel:rank space_hard a:species_word {
      def value 
        sel.apply(a)
      end
      def canonical
        sel.canonical(a)
      end
      
      def pos
        {a.interval.begin => ['infraspecies', a.interval.end]}
      end
    
      def details
        sel.details(a)
      end
    }
    /
    species_word ![\.] {
      def value
        text_value
      end
      
      def canonical
        value
      end
  
      def pos
        {interval.begin => ['infraspecies', interval.end]}
      end
  
      def details
        {:infraspecies => {:epitheton => value, :rank => 'n/a'}}
      end
    }
  end
  
  rule taxon_concept_rank
    ("sec."/"sensu.") {
      def value
        "sec."
      end
      def apply(a)
        " " + value + " " + a.value
      end
      def details(a = nil)
        {:taxon_concept => a.details}
      end    
    }
  end
  
  rule rank
    ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α" 
    /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
    {
      def value
        text_value.strip
      end

      def apply(a)
        " " + text_value + " " + a.value
      end

      def canonical(a)
        " " + a.value
      end
      
      def details(a = nil)
        {:infraspecies => {:epitheton => (a.value rescue nil), :rank => text_value}}
      end
    }
    /
    rank_forma
  end
  
  rule rank_forma
    ("forma"/"form."/"fo."/"f.")
    {
      def value
        "f."
      end
      def apply(a)
        " " + value + " " + a.value
      end
      def canonical(a)
        " " + a.value
      end
      def details(a = nil)
        {:infraspecies => {:epitheton => (a.value rescue nil), :rank => value}}
      end
    }
  end
  
  rule species
    a:species_epitheton space b:authorship {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def hybrid
        a.hybrid rescue false
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:species => a.details[:species].merge(b.details)}
      end
    }
    /
    species_epitheton
  end
  
  rule species_epitheton
    a:species_word &(space_hard author_prefix_word space_hard) {
      def value 
        a.value
      end
      
      def canonical
        a.value
      end
      
      def hybrid
        a.hybrid rescue false
      end
    
      def pos
        {a.interval.begin => ['species', a.interval.end]}
      end
    
      def details
        {:species => {:epitheton => a.value}}
      end
    }
    /
    species_word {
      def canonical
        value
      end
      
      def pos
        {interval.begin => ['species', interval.end]}
      end
      
      def hybrid
        false
      end
      
      def details
        {:species => {:epitheton => value}}
      end
    }
    /
    species_word_hybrid
  end
  
  rule subgenus
    left_paren space a:cap_latin_word space right_paren {
      def value
        "(" + a.value + ")"
      end
      
      def canonical
        a.value
      end
      
      def pos
        {a.interval.begin => ['subgenus', a.interval.end]}
      end
      
      def details
        {:subgenus => {:epitheton => a.value}}
      end
    }
  end
  
  rule genus
    a:cap_latin_word !(space_hard author_prefix_word space_hard author_word) {
      def value
        a.value
      end
      
      def pos
        {a.interval.begin => ['genus', a.interval.end]}
      end
      
      def canonical
        a.value
      end
          
      def details
        {:genus => {:epitheton => a.value}}
      end
    }
  end
  
  rule uninomial_name
    a:uninomial_epitheton space_hard b:authorship {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        false
      end
      
      def details
        {:uninomial => a.details[:uninomial].merge(b.details)}
      end
    }
    /
    uninomial_epitheton
  end

  rule uninomial_epitheton
    cap_latin_word {
      def canonical
        value
      end
      
      def pos
        {interval.begin => ['uninomial', interval.end]}
      end
      
      def hybrid
        false
      end
      
      def details 
        {:uninomial => {:epitheton => value}}
      end
    }
  end
  
  rule authorship
    a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
      def value
        a.value + " " + b.value + " " + c.value
      end
      
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def details
        val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
        val[:combinationAuthorTeam].merge!(c.details)
        val
      end
    }
    /
    a:basionym_authorship_with_parenthesis space b:simple_authorship {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
      end
    }
    /
    basionym_authorship_with_parenthesis
    /
    a:simple_authorship ","? space b:ex_authorship {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        val = a.details
        val[:authorship] = text_value.strip
        val[:basionymAuthorTeam].merge!(b.details)
        val
      end
    }
    /
    simple_authorship
  end
   
  
  rule basionym_authorship_with_parenthesis
    left_paren space a:authors_names space right_paren space [,]? space b:year {
      def value
        "(" + a.value + " " + b.value + ")"
      end
      
      def pos
       a.pos.merge(b.pos)
      end 
      
      def details
        { :authorship => text_value, 
          :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)          
          }
      end
    }
    /
    left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren {
      def value
        "(" + a.value + " " + b.value + ")"
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        val = a.details
        val[:basionymAuthorTeam].merge!(b.details)
        val[:authorship] = text_value.strip
        val
      end
    }
    /
    left_paren space a:simple_authorship space right_paren {
      def value
        "(" + a.value + ")"
      end
      
      def pos
        a.pos
      end
      
      def details
        val = a.details
        val[:authorship] = text_value
        val      
      end
    }
    /
    left_paren space a:"?" space right_paren {
      def value
        "(?)"
      end
      
      def pos
        {a.interval.begin => ['unknown_author', a.interval.end]}
      end
      
      def details
        {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
      end
    }
  end
  
  rule ex_authorship
    ex_sep space b:simple_authorship {
      def value
        " ex " + b.value
      end
      
      def pos
        b.pos
      end
      
      def details
        val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
        val
      end
    } 
  end
  

  rule simple_authorship
    a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        details_with_arg(:basionymAuthorTeam)
      end
      
      def details_with_arg(authorTeamType = 'basionymAuthorTeam')
        { :authorship => text_value, 
          authorTeamType.to_sym => {
            :authorTeam => a.text_value.strip
          }.merge(a.details).merge(b.details)
        }
      end
    }
    /
    a:authors_names space [,]? space b:year {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        details_with_arg(:basionymAuthorTeam)
      end
      
      def details_with_arg(authorTeamType = 'basionymAuthorTeam')
        { :authorship => text_value, 
          authorTeamType.to_sym => {
            :authorTeam => a.text_value.strip
          }.merge(a.details).merge(b.details)
        }
      end
    }
    /
    authors_names {
      def details
        details = details_with_arg(:basionymAuthorTeam)
        details[:basionymAuthorTeam].merge!(super)
        details
      end
      
      def details_with_arg(authorTeamType = 'basionymAuthorTeam')
        { :authorship => text_value, 
          authorTeamType.to_sym => {
            :authorTeam => text_value,
          }
        }      
      end
    }
  end
  
  rule authors_names
    a:author_name space sep:author_separator space b:authors_names {
      def value
        sep.apply(a,b)
      end
      
      def pos
        sep.pos(a,b)
      end
      
      def details
        sep.details(a,b)
      end
    }
    /
    author_name
    /
    unknown_auth
  end
  
  
  rule unknown_auth
    ("auct."/"hort."/"anon."/"ht.") {
      def value
        text_value
      end
      
      def pos
       {interval.begin => ['unknown_author', interval.end]}
      end
      
      def details
        {:author => ["unknown"]}
      end
    }
  end
  
  rule ex_sep
    ("ex"/"in") &[\s]
  end
  
  rule author_separator
    ("&"/","/"and"/"et") {
      def apply(a,b)
        sep = text_value.strip
        sep = " et" if ["&","and","et"].include? sep
        a.value + sep + " " + b.value
      end
      
      def pos(a,b)
        a.pos.merge(b.pos)
      end
      
      def details(a,b)
        {:author => a.details[:author] + b.details[:author]}
      end
    }
  end
  
  rule author_name
    space a:author_prefix_word space b:author_name space {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    space a:author_word space b:author_name space {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    author_word
  end
  
  rule author_word
    "A S. Xu" {
      def value
        text_value.strip
      end
      
      def pos
        {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    ("arg."/"et al.\{\?\}"/"et al.") {
      def value
        text_value.strip
      end
      
      def pos
        #cheating because there are several words in some of them
        {interval.begin => ['author_word', interval.end]}
      end
      
      def details
        {:author => [value]}
      end
    }
    / 
    ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
      def value
        text_value
      end
      
      def pos
        {interval.begin => ['author_word', interval.end]}
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    "X" [^0-9\[\]\(\)\s&,]+ {
      def value
        text_value
      end
      
      def pos
        {interval.begin => ['author_word', interval.end]}
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    author_prefix_word
  end
  
  rule author_prefix_word
    space ("ab"/"bis"/"da"/"der"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
      def value
        text_value
      end
      
      def pos
        #cheating because there are several words in some of them
        {interval.begin => ['author_word', interval.end]}
      end
    }
  end
  
  rule cap_latin_word
    a:([A-Z]/cap_digraph) b:latin_word "?" {
      def value
        (a.value rescue a.text_value) + b.value
      end
    }
    /
    a:([A-Z]/cap_digraph) b:latin_word {
      def value
        (a.value rescue a.text_value) + b.value
      end
    }
    /
    ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
      def value
        text_value
      end
    }
  end

  rule species_word_hybrid
    a:multiplication_sign space b:species_word {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        b.value
      end
      
      def hybrid
        true
      end
      
      def pos
        {b.interval.begin => ['species', b.interval.end]}
      end
      
      def details
        {:species => {:epitheton => b.value}}
      end
    }
    /
    a:"X" space b:species_word {
      def value
        "× " + b.value
      end
      
      def canonical
        b.value
      end
      
      def hybrid
        true
      end
      
      def pos
        {b.interval.begin => ['species', b.interval.end]}
      end
      
      def details
        {:species => {:epitheton => b.value}}
      end
    }
    /
    a:"x" space_hard b:species_word {
      def value
        "× " + b.value
      end
      
      def canonical
        b.value
      end
      
      def hybrid
        true
      end
      
      def pos
        {b.interval.begin => ['species', b.interval.end]}
      end
      
      def details
        {:species => {:epitheton => b.value}}
      end
    }
  end

  rule species_prefix
    ("aff."/"corrig."/"?") &space_hard
  end
  
  rule species_word
    a:[0-9]+ "-"? b:latin_word {
      def value
        a.text_value + "-" + b.value
      end
    }
    /
    latin_word
  end

  rule latin_word
    a:[a-zëüäöïéåóç] b:full_name_letters {
      def value
        a.text_value + b.value
      end
     }
     /
     a:digraph b:full_name_letters {
      def value
        a.value + b.value
      end
     }
  end

  rule full_name_letters
    a:digraph b:full_name_letters {
      def value
        a.value + b.value
      end
    }
    /
    a:valid_name_letters b:digraph c:full_name_letters {
      def value
        a.value + b.value + c.value
      end
    }
    /
    valid_name_letters
  end

  rule valid_name_letters
    [a-z\-ëüäöïéåóç]+ {
      def value
        text_value
      end
    }
  end 

  rule cap_digraph
    "Æ" {
      def value
      'Ae'
      end
    }
    /
    "Œ" {
      def value
      'Oe'
      end
    }  
  end

  rule digraph
    "æ" {
      def value
      'ae'
      end
    }   
    /
    "œ" {
      def value
      'oe'
      end
    }
  end

  rule year
    b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
      def value
        a.value
      end
      
      def pos
        a.pos
      end
      
      def details
        a.details
      end 
    }
    /
    year_number_with_character
    /
    year_number
  end

  rule year_number_with_character
    a:year_number [a-zA-Z] {
      def value
        a.text_value
      end

      def pos
        {interval.begin => ['year', interval.end]}
      end

      def details
        {:year => value}
      end
    }
  end
  
  rule year_number
    [12] [7890] [0-9] [0-9]? [\?]? {
      def value
        text_value
      end
      
      def pos
        {interval.begin => ['year', interval.end]}
      end
      
      def details
        {:year => value}
      end
    }
  end
  
  rule left_paren
    "("
  end
  
  rule right_paren
    ")"
  end
  
  rule hybrid_character
    ("x"/"X") {
      def value
        "×"
      end
    }
    /
    multiplication_sign
  end
  
  rule multiplication_sign
    "×" {
      def value
        text_value
      end
    }
  end
  
  rule space
    [\s]*
  end

  rule space_hard
    [\s]+
  end
  
end