# encoding: UTF-8
grammar ScientificNameClean

  rule root
    space a:scientific_name_5 space {
      def value
        a.value.gsub(/\s{2,}/, ' ').strip
      end
      
      def canonical
        a.canonical.gsub(/\s{2,}/, ' ').strip
      end
      
      def pos
        a.pos
      end
      
      def hybrid
        a.hybrid
      end
      
      def details
        a.details.class == Array ? a.details : [a.details]
      end

      def parser_run
        1
      end
    }
  end
  
  rule scientific_name_5
    a:multinomial_name space_hard hybrid_character space_hard b:species {
      def value
        a.value + " × " + b.value
      end
      
      def canonical
        a.canonical + " × " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        true
      end
      
      def details
        [a.details, b.details.merge({:genus => a.details[:genus]})]
      end
    }
    /
    a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
      def value
        a.value + " " + b.apply(c)
      end
    
      def canonical
        a.canonical
      end
      
      def pos
        a.pos.merge(c.pos)
      end
      
      def hybrid
        a.hybrid
      end
      
      def details
        a.details.merge(b.details(c))
      end
    }
    /
    scientific_name_4
  end
  
  rule scientific_name_4
    a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
      def value
        a.value + " × " + b.value
      end
      
      def canonical
        a.canonical + " × " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        true
      end
      
      def details
        [a.details, b.details]
      end
    }
    /
    a:scientific_name_1 space hybrid_character space [\?]? {
      def value
        a.value + " × ?"
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos
      end
      
      def hybrid
        true
      end
      
      def details
        [a.details, "?"]
      end
    }
    /
    scientific_name_3
  end
  
  rule scientific_name_3
    a:hybrid_character space b:scientific_name_2 {
      def  value
        a.value + " " + b.value
      end
      
      def canonical
        b.canonical
      end
      
      def pos
        b.pos
      end
      
      def hybrid
        true
      end
      
      def details
        b.details
      end
    }
    /
    scientific_name_2
  end
  
  rule scientific_name_2
    a:scientific_name_1 space b:status_part {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos
      end
      
      def hybrid
        a.hybrid rescue false
      end
      
      def details
        a.details.merge(b.details)
      end
    }
    /
    scientific_name_1
  end

  rule scientific_name_1
    multinomial_name
    /
    uninomial_name  
  end
  
  
  rule status_part
    a:status_word space b:status_part {
      def value
        a.value + " " + b.value
      end
      def details
        {:status => value}
      end
    }
    /
    status_word
  end
  
  rule status_word
    latin_word [\.] {
      def value
        text_value.strip
      end
      def details
        {:status => value}
      end
    }
    #/
    #latin_word
  end
  
  
  rule multinomial_name
    a:genus space b:infragenus space species_prefix? space c:species space_hard d:infraspecies_mult {
      def value
        a.value + " " + b.value + " " + c.value + " " + d.value
      end
    
      def canonical
        a.canonical + " " + c.canonical + " " + d.canonical
      end
    
      def pos
        a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
      end
      
      def hybrid
        c.hybrid rescue false
      end
    
      def details
        a.details.merge(b.details).merge(c.details).merge(d.details)
      end
    }
    /  
    a:genus space b:infragenus space species_prefix? space c:species {
      def value
        a.value + " " + b.value + " " + c.value
      end
      
      def canonical
        a.canonical + " " + c.canonical
      end
      
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def hybrid
        c.hybrid rescue false
      end
      
      def details
        a.details.merge(b.details).merge(c.details)
      end
    }
    /
    a:genus space species_prefix? space b:species space_hard c:infraspecies_mult {
      def value
        a.value + " " + b.value + " " + c.value 
      end

      def canonical
        a.canonical + " " + b.canonical + " " + c.canonical
      end
    
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def hybrid
        b.hybrid rescue false
      end
    
      def details
        a.details.merge(b.details).merge(c.details)
      end
    }
    /
    a:genus space species_prefix? space b:species {
      def value
        a.value + " " + b.value 
      end

      def canonical
        a.canonical + " " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        b.hybrid rescue false
      end
      
      def details
        a.details.merge(b.details)
      end
    }
  end
  
  rule infraspecies_mult
    a:infraspecies space b:infraspecies_mult {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical + " " + b.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        a_array =  a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]] 
        b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
        a.details.merge({:infraspecies => a_array + b_array})
      end
    }
    /
    infraspecies {
      def details
        {:infraspecies => [super[:infraspecies]]}
      end
    }
  end
  
  rule infraspecies
    a:infraspecies_string space b:authorship {
      def value
        a.value + " " + b.value
      end
  
      def canonical
        a.canonical
      end
  
      def pos
        a.pos.merge(b.pos)
      end
  
      def details
        {:infraspecies => a.details[:infraspecies].merge(b.details)}
      end
    }
    /
    infraspecies_string 
  end
  
  rule infraspecies_string
    sel:rank space_hard a:species_word {
      def value 
        sel.apply(a)
      end
      def canonical
        sel.canonical(a)
      end
      
      def pos
        {a.interval.begin => ['infraspecies', a.interval.end]}
      end
    
      def details
        sel.details(a)
      end
    }
    /
    species_word ![\.] {
      def value
        text_value
      end
      
      def canonical
        value
      end
  
      def pos
        {interval.begin => ['infraspecies', interval.end]}
      end
  
      def details
        {:infraspecies => {:string => value, :rank => 'n/a'}}
      end
    }
  end
  
  rule taxon_concept_rank
    ("sec."/"sensu.") {
      def value
        "sec."
      end
      def apply(a)
        " " + value + " " + a.value
      end
      def details(a = nil)
        {:taxon_concept => a.details}
      end    
    }
  end
  
  rule rank
    ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var"/"subsp."/"subsp"/"subf."/"race"/"α" 
    /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
    {
      def value
        text_value.strip
      end

      def apply(a)
        " " + text_value + " " + a.value
      end

      def canonical(a)
        " " + a.value
      end
      
      def details(a = nil)
        {:infraspecies => {:string => (a.value rescue nil), :rank => text_value}}
      end
    }
    /
    rank_forma
  end
  
  rule rank_forma
    ("forma"/"form."/"form"/"fo."/"f.")
    {
      def value
        "f."
      end
      def apply(a)
        " " + value + " " + a.value
      end
      def canonical(a)
        " " + a.value
      end
      def details(a = nil)
        {:infraspecies => {:string => (a.value rescue nil), :rank => value}}
      end
    }
  end
  
  rule species
    a:species_string space b:authorship {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def hybrid
        a.hybrid rescue false
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:species => a.details[:species].merge(b.details)}
      end
    }
    /
    species_string
  end
  
  rule species_string
    # a:species_word &(space_hard author_prefix_word space_hard) {
    #   def value 
    #     a.value
    #   end
    #   
    #   def canonical
    #     a.value
    #   end
    #   
    #   def hybrid
    #     a.hybrid rescue false
    #   end
    # 
    #   def pos
    #     {a.interval.begin => ['species', a.interval.end]}
    #   end
    # 
    #   def details
    #     {:species => {:string => a.value}}
    #   end
    # }
    # /
    species_word {
      def canonical
        value
      end
      
      def pos
        {interval.begin => ['species', interval.end]}
      end
      
      def hybrid
        false
      end
      
      def details
        {:species => {:string => value}}
      end
    }
    /
    species_word_hybrid
  end
  
  rule infragenus
    left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
      def value
        "(" + a.value + ")"
      end
      
      def canonical
        a.value
      end
      
      def pos
        {a.interval.begin => ['infragenus', a.interval.end]}
      end
      
      def details
        {:infragenus => {:string => a.value}}
      end
    }
  end
  
  rule genus
    a:uninomial_string !(space_hard author_prefix_word space_hard author_word) {
      def value
        a.value
      end
      
      def pos
        {a.interval.begin => ['genus', a.interval.end]}
      end
      
      def canonical
        a.value
      end
          
      def details
        {:genus => {:string => a.value}}
      end
    }
  end
  
  rule uninomial_name
    a:uninomial_string space b:infragenus space c:simple_authorship {
      def value
        a.value + " " + b.value + " " + c.value
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def hybrid
        false
      end
      
      def details
        {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
      end
    }
    /
    a:uninomial_string space b:infragenus {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        false
      end
      
      def details
        {:uninomial => a.details[:uninomial].merge(b.details)}
      end
    }
    /
    a:uninomial_string space_hard b:authorship {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        a.canonical
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def hybrid
        false
      end
      
      def details
        {:uninomial => a.details[:uninomial].merge(b.details)}
      end
    }
    /
    uninomial_string
  end

  rule uninomial_string
    (cap_latin_word_pair/cap_latin_word) {
      def canonical
        value
      end
      
      def pos
        {interval.begin => ['uninomial', interval.end]}
      end
      
      def hybrid
        false
      end
      
      def details 
        {:uninomial => {:string => value}}
      end
    }
  end
  
  rule authorship
    a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
      def value
        a.value + " " + b.value + " " + c.value
      end
      
      def pos
        a.pos.merge(b.pos).merge(c.pos)
      end
      
      def details
        val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
        val[:combinationAuthorTeam].merge!(c.details)
        val
      end
    }
    /
    a:basionym_authorship_with_parenthesis space b:simple_authorship {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
      end
    }
    /
    basionym_authorship_with_parenthesis
    /
    a:simple_authorship ","? space b:ex_authorship {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        val = a.details
        val[:authorship] = text_value.strip
        val[:basionymAuthorTeam].merge!(b.details)
        val
      end
    }
    /
    simple_authorship
  end
   
  
  rule basionym_authorship_with_parenthesis
    left_paren space a:authors_names space right_paren space [,]? space b:year {
      def value
        "(" + a.value + " " + b.value + ")"
      end
      
      def pos
       a.pos.merge(b.pos)
      end 
      
      def details
        { :authorship => text_value, 
          :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)          
          }
      end
    }
    /
    left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren {
      def value
        "(" + a.value + " " + b.value + ")"
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        val = a.details
        val[:basionymAuthorTeam].merge!(b.details)
        val[:authorship] = text_value.strip
        val
      end
    }
    /
    left_paren space a:simple_authorship space right_paren {
      def value
        "(" + a.value + ")"
      end
      
      def pos
        a.pos
      end
      
      def details
        val = a.details
        val[:authorship] = text_value
        val      
      end
    }
    /
    left_paren space a:"?" space right_paren {
      def value
        "(?)"
      end
      
      def pos
        {a.interval.begin => ['unknown_author', a.interval.end]}
      end
      
      def details
        {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
      end
    }
  end
  
  rule ex_authorship
    ex_sep space b:simple_authorship {
      def value
        " ex " + b.value
      end
      
      def pos
        b.pos
      end
      
      def details
        val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
        val
      end
    } 
  end

  rule simple_authorship
    a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        details_with_arg(:basionymAuthorTeam)
      end
      
      def details_with_arg(authorTeamType = 'basionymAuthorTeam')
        { :authorship => text_value, 
          authorTeamType.to_sym => {
            :authorTeam => a.text_value.strip
          }.merge(a.details).merge(b.details)
        }
      end
    }
    /
    a:authors_names space [,]? space b:year {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        details_with_arg(:basionymAuthorTeam)
      end
      
      def details_with_arg(authorTeamType = 'basionymAuthorTeam')
        { :authorship => text_value, 
          authorTeamType.to_sym => {
            :authorTeam => a.text_value.strip
          }.merge(a.details).merge(b.details)
        }
      end
    }
    /
    authors_names {
      def details
        details = details_with_arg(:basionymAuthorTeam)
        details[:basionymAuthorTeam].merge!(super)
        details
      end
      
      def details_with_arg(authorTeamType = 'basionymAuthorTeam')
        { :authorship => text_value, 
          authorTeamType.to_sym => {
            :authorTeam => text_value,
          }
        }      
      end
    }
  end
  
  rule authors_names
    a:author_name space sep:author_separator space b:authors_names {
      def value
        sep.apply(a,b)
      end
      
      def pos
        sep.pos(a,b)
      end
      
      def details
        sep.details(a,b)
      end
    }
    /
    author_name
    /
    unknown_auth
  end
  
  
  rule unknown_auth
    ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") {
      def value
        text_value
      end
      
      def pos
       {interval.begin => ['unknown_author', interval.end]}
      end
      
      def details
        {:author => ["unknown"]}
      end
    }
  end
  
  rule ex_sep
    ("ex"/"in") &[\s]
  end
  
  rule author_separator
    ("&"/","/"and"/"et") {
      def apply(a,b)
        sep = text_value.strip
        sep = " et" if ["&","and","et"].include? sep
        a.value + sep + " " + b.value
      end
      
      def pos(a,b)
        a.pos.merge(b.pos)
      end
      
      def details(a,b)
        {:author => a.details[:author] + b.details[:author]}
      end
    }
  end

  rule author_name
    space a:author_name_without_postfix space b:author_postix_word {
      def value
        a.value + ' ' + b.value
      end
      
      def pos 
        a.pos.merge(b.pos)
      end

      def details
        {:author => [value]}
      end
    }
    /
    author_name_without_postfix
  end
  
  rule author_name_without_postfix
    space a:author_prefix_word space b:author_name {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    a:author_word space b:author_name {
      def value
        a.value + " " + b.value
      end
      
      def pos
        a.pos.merge(b.pos)
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    author_word
  end
  
  rule author_word
    "A S. Xu" {
      def value
        text_value.strip
      end
      
      def pos
        {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    ("arg."/"et al.\{\?\}"/"et al."/"et al") {
      def value
        text_value.strip
      end
      
      def pos
        #cheating because there are several words in some of them
        {interval.begin => ['author_word', interval.end]}
      end
      
      def details
        {:author => [value]}
      end
    }
    / 
    ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
      def value
        text_value
      end
      
      def pos
        {interval.begin => ['author_word', interval.end]}
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    "X" [^0-9\[\]\(\)\s&,]+ {
      def value
        text_value
      end
      
      def pos
        {interval.begin => ['author_word', interval.end]}
      end
      
      def details
        {:author => [value]}
      end
    }
    /
    author_prefix_word
  end
  
  rule author_prefix_word
    space ("ab"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
      def value
        text_value
      end
      
      def pos
        #cheating because there are several words in some of them
        {interval.begin => ['author_word', interval.end]}
      end
    }
  end

  rule author_postix_word
    space ("f."/"filius") space {
      def value 
        text_value.strip
      end

      def pos
        {interval.begin => ['author_word', interval.end]}
      end
    }
  end
  
  rule cap_latin_word_pair
    a:cap_latin_word "-" b:cap_latin_word {
      def value
        a.value + b.value.downcase
      end
    }
  end
  
  rule cap_latin_word
    a:([A-Z]/cap_digraph) b:latin_word "?" {
      def value
        (a.value rescue a.text_value) + b.value
      end
    }
    /
    a:([A-Z]/cap_digraph) b:latin_word {
      def value
        (a.value rescue a.text_value) + b.value
      end
    }
    /
    a:("AE"/"OE") b:latin_word {
      def value
        a.text_value[0..0] + 'e' + b.value
      end
    }
    /
    ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
      def value
        text_value
      end
    }
  end

  rule capped_dotted_char
    [A-Z] "." {
      def value
        text_value
      end
    }
  end

  rule species_word_hybrid
    a:multiplication_sign space b:species_word {
      def value
        a.value + " " + b.value
      end
      
      def canonical
        b.value
      end
      
      def hybrid
        true
      end
      
      def pos
        {b.interval.begin => ['species', b.interval.end]}
      end
      
      def details
        {:species => {:string => b.value}}
      end
    }
    /
    a:"X" space b:species_word {
      def value
        "× " + b.value
      end
      
      def canonical
        b.value
      end
      
      def hybrid
        true
      end
      
      def pos
        {b.interval.begin => ['species', b.interval.end]}
      end
      
      def details
        {:species => {:string => b.value}}
      end
    }
    /
    a:"x" space_hard b:species_word {
      def value
        "× " + b.value
      end
      
      def canonical
        b.value
      end
      
      def hybrid
        true
      end
      
      def pos
        {b.interval.begin => ['species', b.interval.end]}
      end
      
      def details
        {:species => {:string => b.value}}
      end
    }
  end

  rule species_prefix
    ("aff."/"corrig."/"?") &space_hard
  end
  
  rule species_word
    a:[0-9]+ "-"? b:latin_word {
      def value
        num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"}
        a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-"
        a_value + b.value
      end
    }
    /
    latin_word
  end

  rule latin_word
    a:valid_name_letters "-" b:latin_word {
      def value
        a.value + "-" + b.value
      end
    }
    /
    a:valid_name_letter b:valid_name_letters {
      def value
        a.value + b.value
      end
     }
  end

  rule valid_name_letters
    [a-zëæœ]+ {
      def value
        res = ''
        text_value.split('').each do |l|
          l = 'ae' if l == 'æ'
          l = 'oe' if l == 'œ'
          # not sure if we should normalize ë as well. It is legal in botanical code, but it 
          # might be beneficial to normalize it for the reconsiliation purposes
          # l = 'e' if l == 'ë' 
          res << l
        end
        res
      end
    }
  end

  rule valid_name_letter
    [a-zëæœ] {
      def value
        res = text_value
        res = 'ae' if res == 'æ'
        res = 'oe' if res == 'œ'
        res
      end
    }
  end


  rule cap_digraph
    "Æ" {
      def value
      'Ae'
      end
    }
    /
    "Œ" {
      def value
      'Oe'
      end
    }  
  end

  rule year
    b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
      def value
        a.value
      end
      
      def pos
        a.pos
      end
      
      def details
        a.details
      end 
    }
    /
    year_number_with_character
    /
    year_number
  end

  rule year_number_with_character
    a:year_number [a-zA-Z] {
      def value
        a.text_value
      end

      def pos
        {interval.begin => ['year', interval.end]}
      end

      def details
        {:year => value}
      end
    }
  end
  
  rule year_number
    [12] [7890] [0-9] [0-9]? [\?]? {
      def value
        text_value
      end
      
      def pos
        {interval.begin => ['year', interval.end]}
      end
      
      def details
        {:year => value}
      end
    }
  end
  
  rule left_paren
    "("
  end
  
  rule right_paren
    ")"
  end
  
  rule hybrid_character
    ("x"/"X") {
      def value
        "×"
      end
    }
    /
    multiplication_sign
  end
  
  rule multiplication_sign
    ("×"/"*") {
      def value
        "×"
      end
    }
  end
  
  rule space
    [\s]*
  end

  rule space_hard
    [\s]+
  end
  
end