# -*- encoding : utf-8 -*-

module PragmaticSegmenter
  # This class searches for a list within a string and adds
  # newlines before each list item.
  class List
    # Rubular: http://rubular.com/r/XcpaJKH0sz
    ALPHABETICAL_LIST_WITH_PERIODS =
      /(?<=^)[a-z](?=\.)|(?<=\A)[a-z](?=\.)|(?<=\s)[a-z](?=\.)/

    # Rubular: http://rubular.com/r/Gu5rQapywf
    ALPHABETICAL_LIST_WITH_PARENS =
      /(?<=\()[a-z]+(?=\))|(?<=^)[a-z]+(?=\))|(?<=\A)[a-z]+(?=\))|(?<=\s)[a-z]+(?=\))/i

    SubstituteListPeriodRule = Rule.new(/♨/, '∯')
    ListMarkerRule = Rule.new(/☝/, '')

    # Rubular: http://rubular.com/r/Wv4qLdoPx7
    SpaceBetweenListItemsFirstRule = Rule.new(/(?<=\S\S|^)\s(?=\S\s*\d{1,2}♨)/, "\r")

    # Rubular: http://rubular.com/r/AizHXC6HxK
    SpaceBetweenListItemsSecondRule = Rule.new(/(?<=\S\S|^)\s(?=\d{1,2}♨)/, "\r")

    # Rubular: http://rubular.com/r/GE5q6yID2j
    SpaceBetweenListItemsThirdRule = Rule.new(/(?<=\S\S|^)\s(?=\d{1,2}☝)/, "\r")

    NUMBERED_LIST_REGEX_1 =
      /\s\d{1,2}(?=\.\s)|^\d{1,2}(?=\.\s)|\s\d{1,2}(?=\.\))|^\d{1,2}(?=\.\))|(?<=\s\-)\d{1,2}(?=\.\s)|(?<=^\-)\d{1,2}(?=\.\s)|(?<=\s\⁃)\d{1,2}(?=\.\s)|(?<=^\⁃)\d{1,2}(?=\.\s)|(?<=s\-)\d{1,2}(?=\.\))|(?<=^\-)\d{1,2}(?=\.\))|(?<=\s\⁃)\d{1,2}(?=\.\))|(?<=^\⁃)\d{1,2}(?=\.\))/
    NUMBERED_LIST_REGEX_2 =
      /(?<=\s)\d{1,2}\.(?=\s)|^\d{1,2}\.(?=\s)|(?<=\s)\d{1,2}\.(?=\))|^\d{1,2}\.(?=\))|(?<=\s\-)\d{1,2}\.(?=\s)|(?<=^\-)\d{1,2}\.(?=\s)|(?<=\s\⁃)\d{1,2}\.(?=\s)|(?<=^\⁃)\d{1,2}\.(?=\s)|(?<=\s\-)\d{1,2}\.(?=\))|(?<=^\-)\d{1,2}\.(?=\))|(?<=\s\⁃)\d{1,2}\.(?=\))|(?<=^\⁃)\d{1,2}\.(?=\))/
    NUMBERED_LIST_PARENS_REGEX = /\d{1,2}(?=\)\s)/

    # Rubular: http://rubular.com/r/NsNFSqrNvJ
    EXTRACT_ALPHABETICAL_LIST_LETTERS_REGEX =
      /\([a-z]+(?=\))|(?<=^)[a-z]+(?=\))|(?<=\A)[a-z]+(?=\))|(?<=\s)[a-z]+(?=\))/i

    # Rubular: http://rubular.com/r/wMpnVedEIb
    ALPHABETICAL_LIST_LETTERS_AND_PERIODS_REGEX =
      /(?<=^)[a-z]\.|(?<=\A)[a-z]\.|(?<=\s)[a-z]\./i

    attr_reader :text
    def initialize(text:)
      @text = Text.new(text)
    end

    def add_line_break
      formatted_text = format_alphabetical_lists(text)
      formatted_text = format_roman_numeral_lists(formatted_text)
      formatted_text = format_numbered_list_with_periods(formatted_text)
      format_numbered_list_with_parens(formatted_text)
    end

    private

    def format_numbered_list_with_parens(txt)
      new_txt = replace_parens_in_numbered_list(txt)
      new_txt = add_line_breaks_for_numbered_list_with_parens(new_txt)
      new_txt.apply(ListMarkerRule)
    end

    def format_numbered_list_with_periods(txt)
      new_txt = replace_periods_in_numbered_list(txt)
      new_txt = add_line_breaks_for_numbered_list_with_periods(new_txt)
      new_txt.apply(SubstituteListPeriodRule)
    end

    def format_alphabetical_lists(txt)
      new_txt = add_line_breaks_for_alphabetical_list_with_periods(txt, false)
      add_line_breaks_for_alphabetical_list_with_parens(new_txt, false)
    end

    def format_roman_numeral_lists(txt)
      new_txt = add_line_breaks_for_alphabetical_list_with_periods(txt, true)
      add_line_breaks_for_alphabetical_list_with_parens(new_txt, true)
    end

    def replace_periods_in_numbered_list(txt)
      scan_lists(NUMBERED_LIST_REGEX_1, NUMBERED_LIST_REGEX_2, '♨', true, txt)
    end

    def add_line_breaks_for_numbered_list_with_periods(txt)
      return txt unless txt.include?('♨') &&
                        txt !~ /♨.+\n.+♨|♨.+\r.+♨/ &&
                        txt !~ /for\s\d{1,2}♨\s[a-z]/
      txt.apply(SpaceBetweenListItemsFirstRule).
          apply(SpaceBetweenListItemsSecondRule)
    end

    def replace_parens_in_numbered_list(txt)
      scan_lists(
        NUMBERED_LIST_PARENS_REGEX, NUMBERED_LIST_PARENS_REGEX, '☝', false, txt)
    end

    def add_line_breaks_for_numbered_list_with_parens(txt)
      return txt unless txt.include?('☝') && txt !~ /☝.+\n.+☝|☝.+\r.+☝/
      txt.apply(SpaceBetweenListItemsThirdRule)
    end

    def scan_lists(regex1, regex2, replacement, strip, txt)
      list_array = txt.scan(regex1).map(&:to_i)
      list_array.each_with_index do |a, i|
        next unless (a + 1).eql?(list_array[i + 1]) ||
                    (a - 1).eql?(list_array[i - 1]) ||
                    (a.eql?(0) && list_array[i - 1].eql?(9)) ||
                    (a.eql?(9) && list_array[i + 1].eql?(0))
        substitute_found_list_items(txt, regex2, a, strip, replacement)
      end
      txt
    end

    def substitute_found_list_items(txt, regex, a, strip, replacement)
      txt.gsub!(regex).with_index do |m|
        if a.to_s.eql?(strip ? m.strip.chop : m)
          "#{Regexp.escape(a.to_s)}" + replacement
        else
          "#{m}"
        end
      end
    end

    def add_line_breaks_for_alphabetical_list_with_periods(txt, roman_numeral)
      iterate_alphabet_array(ALPHABETICAL_LIST_WITH_PERIODS, false, txt, roman_numeral)
    end

    def add_line_breaks_for_alphabetical_list_with_parens(txt, roman_numeral)
      iterate_alphabet_array(ALPHABETICAL_LIST_WITH_PARENS, true, txt, roman_numeral)
    end

    def replace_alphabet_list(a, txt)
      txt.gsub!(ALPHABETICAL_LIST_LETTERS_AND_PERIODS_REGEX).with_index do |m|
        a.eql?(m.chomp('.')) ? "\r#{Regexp.escape(a.to_s)}∯" : "#{m}"
      end
    end

    def replace_alphabet_list_parens(a, txt)
      txt.gsub!(EXTRACT_ALPHABETICAL_LIST_LETTERS_REGEX).with_index do |m|
        if m.include?('(')
          a.eql?(m.dup.downcase.gsub!(/\(/, '')) ? "\r&✂&#{Regexp.escape(m.gsub!(/\(/, ''))}" : "#{m}"
        else
          a.eql?(m.dup.downcase) ? "\r#{Regexp.escape(m)}" : "#{m}"
        end
      end
    end

    def replace_correct_alphabet_list(a, txt, parens)
      if parens
        replace_alphabet_list_parens(a, txt)
      else
        replace_alphabet_list(a, txt)
      end
    end

    def last_array_item_replacement(a, i, alphabet, list_array, txt, parens)
      return if alphabet & list_array == [] ||
        !alphabet.include?(list_array[i - 1]) ||
        !alphabet.include?(a)
      return if (alphabet.index(list_array[i - 1]) - alphabet.index(a)).abs != 1
      replace_correct_alphabet_list(a, txt, parens)
    end

    def other_items_replacement(a, i, alphabet, list_array, txt, parens)
      return if alphabet & list_array == [] ||
        !alphabet.include?(list_array[i - 1]) ||
        !alphabet.include?(a) ||
        !alphabet.include?(list_array[i + 1])
      return if alphabet.index(list_array[i + 1]) - alphabet.index(a) != 1 &&
                (alphabet.index(list_array[i - 1]) - alphabet.index(a)).abs != 1
      replace_correct_alphabet_list(a, txt, parens)
    end

    def iterate_alphabet_array(regex, parens, txt, roman_numeral)
      list_array = txt.scan(regex).map(&:downcase)
      if roman_numeral
        alphabet = %w(i ii iii iv v vi vii viii ix x xi xii xiii xiv x xi xii xiii xv xvi xvii xviii xix xx)
      else
        alphabet = ('a'..'z').to_a
      end
      list_array.delete_if { |item| !alphabet.any? { |a| a.include?(item) } }
      list_array.each_with_index do |a, i|
        if i.eql?(list_array.length - 1)
          last_array_item_replacement(a, i, alphabet, list_array, txt, parens)
        else
          other_items_replacement(a, i, alphabet, list_array, txt, parens)
        end
      end
      txt
    end
  end
end