# frozen_string_literal: true

module JekyllImport
  module Util
    # Ruby translation of wordpress wpautop (see https://core.trac.wordpress.org/browser/trunk/src/wp-includes/formatting.php)
    #
    # A group of regex replaces used to identify text formatted with newlines and
    # replace double line-breaks with HTML paragraph tags. The remaining
    # line-breaks after conversion become <<br />> tags, unless $br is set to false
    #
    # @param string pee The text which has to be formatted.
    # @param bool br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
    # @return string Text which has been converted into correct paragraph tags.
    #
    def self.wpautop(pee, br = true)
      return "" if pee.strip == ""

      allblocks = "(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)"
      pre_tags = {}
      pee += "\n"

      if pee.include?("<pre")
        pee_parts = pee.split("</pre>")
        last_pee = pee_parts.pop
        pee = ""
        pee_parts.each_with_index do |pee_part, i|
          start = pee_part.index("<pre")

          unless start
            pee += pee_part
            next
          end

          name = "<pre wp-pre-tag-#{i}></pre>"
          pre_tags[name] = (pee_part[start..-1] + "</pre>").gsub('\\', '\\\\\\\\')

          pee += pee_part[0, start] + name
        end
        pee += last_pee
      end

      pee = pee.gsub(Regexp.new('<br />\s*<br />'), "\n\n")
      pee = pee.gsub(Regexp.new("(<" + allblocks + "[^>]*>)"), "\n\\1")
      pee = pee.gsub(Regexp.new("(</" + allblocks + ">)"), "\\1\n\n")
      pee = pee.gsub("\r\n", "\n").tr("\r", "\n")
      if pee.include? "<object"
        pee = pee.gsub(Regexp.new('\s*<param([^>]*)>\s*'), "<param\\1>")
        pee = pee.gsub(Regexp.new('\s*</embed>\s*'), "</embed>")
      end

      pees = pee.split(%r!\n\s*\n!).compact
      pee = ""
      pees.each { |tinkle| pee += "<p>" + tinkle.chomp("\n") + "</p>\n" }
      pee = pee.gsub(Regexp.new('<p>\s*</p>'), "")
      pee = pee.gsub(Regexp.new("<p>([^<]+)</(div|address|form)>"), "<p>\\1</p></\\2>")
      pee = pee.gsub(Regexp.new('<p>\s*(</?' + allblocks + '[^>]*>)\s*</p>'), "\\1")
      pee = pee.gsub(Regexp.new("<p>(<li.+?)</p>"), "\\1")
      pee = pee.gsub(Regexp.new("<p><blockquote([^>]*)>", "i"), "<blockquote\\1><p>")
      pee = pee.gsub("</blockquote></p>", "</p></blockquote>")
      pee = pee.gsub(Regexp.new('<p>\s*(</?' + allblocks + "[^>]*>)"), "\\1")
      pee = pee.gsub(Regexp.new("(</?" + allblocks + '[^>]*>)\s*</p>'), "\\1")
      if br
        pee = pee.gsub(Regexp.new('<(script|style).*?</\1>')) { |match| match.gsub("\n", "<WPPreserveNewline />") }
        pee = pee.gsub(Regexp.new('(?<!<br />)\s*\n'), "<br />\n")
        pee = pee.gsub("<WPPreserveNewline />", "\n")
      end
      pee = pee.gsub(Regexp.new("(</?" + allblocks + '[^>]*>)\s*<br />'), "\\1")
      pee = pee.gsub(Regexp.new('<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)'), "\\1")
      pee = pee.gsub(Regexp.new('\n</p>$'), "</p>")

      pre_tags.each do |name, value|
        pee.gsub!(name, value)
      end
      pee
    end
  end
end