require 'cgi'
require 'uri'

# :main: TracWiki

# The TracWiki parses and translates Trac formatted text into
# XHTML. Creole is a lightweight markup syntax similar to what many
# WikiWikiWebs use. Example syntax:
#
# = Heading 1 =
# == Heading 2 ==
# === Heading 3 ===
# **Bold text**
# ''Italic text''
# [[Links]]
# ||=Table||=Heading||
# || Table|| Cells ||
# [[Image(image.png)]]
#
# The simplest interface is TracWiki.render. The default handling of
# links allow explicit local links using the [[link]] syntax. External
# links will only be allowed if specified using http(s) and ftp(s)
# schemes. If special link handling is needed, such as inter-wiki or
# hierachical local links, you must inherit Creole::CreoleParser and
# override make_local_link.
#
# You can customize the created image markup by overriding
# make_image.

# Main TracWiki parser class. Call TracWikiParser#parse to parse
# TracWiki formatted text.
#
# This class is not reentrant. A separate instance is needed for
# each thread that needs to convert Creole to HTML.
#
# Inherit this to provide custom handling of links. The overrideable
# methods are: make_local_link
module TracWiki
  class Parser

    # Allowed url schemes
    # Examples: http https ftp ftps
    attr_accessor :allowed_schemes

    # Non-standard wiki text extensions enabled?
    # E.g. underlined, deleted text etc
    attr_writer :extensions
    def extensions?; @extensions; end

    # Disable url escaping for local links
    # Escaping: [[/Test]] --> %2FTest
    # No escaping: [[/Test]] --> Test
    attr_writer :no_escape
    def no_escape?; @no_escape; end

    # Create a new Parser instance.
    def initialize(text, options = {})
      @allowed_schemes = %w(http https ftp ftps)
      @text = text
      @extensions = @no_escape = nil
      options.each_pair {|k,v| send("#{k}=", v) }
    end

    # Convert CCreole text to HTML and return
    # the result. The resulting HTML does not contain <html> and
    # <body> tags.
    #
    # Example:
    #
    # parser = CreoleParser.new("**Hello //World//**", :extensions => true)
    # parser.to_html
    # #=> "<p><strong>Hello <em>World</em></strong></p>"
    def to_html
      @out = ''
      @p = false
      @stack = []
      @stacki = []
      parse_block(@text)
      @out
    end

    protected

    # Escape any characters with special meaning in HTML using HTML
    # entities.
    def escape_html(string)
      CGI::escapeHTML(string)
    end

    # Escape any characters with special meaning in URLs using URL
    # encoding.
    def escape_url(string)
      CGI::escape(string)
    end

    def start_tag(tag, args = '', lindent = nil)
      lindent = @stacki.last || -1  if lindent.nil?

      @stack.push(tag)
      @stacki.push(lindent)

      if tag == 'strongem'
        @out << '<strong><em>'
      else
        @out << '<' << tag << args << '>'
      end
    end

    def end_tag
      tag = @stack.pop
      tagi = @stacki.pop
      if tag == 'strongem'
        @out << '</em></strong>'
      else
        @out << "</#{tag}>"
      end
    end

    def toggle_tag(tag, match)
      if @stack.include?(tag)
        if @stack.last == tag
          end_tag
        else
          @out << escape_html(match)
        end
      else
        start_tag(tag)
      end
    end

    def end_paragraph
      end_tag while !@stack.empty?
      @p = false
    end

    def start_paragraph
      if @p
        @out << ' ' if @out[-1] != ?\s
      else
        end_paragraph
        start_tag('p')
        @p = true
      end
    end

    # Translate an explicit local link to a desired URL that is
    # properly URL-escaped. The default behaviour is to convert local
    # links directly, escaping any characters that have special
    # meaning in URLs. Relative URLs in local links are not handled.
    #
    # Examples:
    #
    # make_local_link("LocalLink") #=> "LocalLink"
    # make_local_link("/Foo/Bar") #=> "%2FFoo%2FBar"
    #
    # Must ensure that the result is properly URL-escaped. The caller
    # will handle HTML escaping as necessary. HTML links will not be
    # inserted if the function returns nil.
    #
    # Example custom behaviour:
    #
    # make_local_link("LocalLink") #=> "/LocalLink"
    # make_local_link("Wikipedia:Bread") #=> "http://en.wikipedia.org/wiki/Bread"
    def make_local_link(link) #:doc:
      no_escape? ? link : escape_url(link)
    end

    # Sanatize a direct url (e.g. http://wikipedia.org/). The default
    # behaviour returns the original link as-is.
    #
    # Must ensure that the result is properly URL-escaped. The caller
    # will handle HTML escaping as necessary. Links will not be
    # converted to HTML links if the function returns link.
    #
    # Custom versions of this function in inherited classes can
    # implement specific link handling behaviour, such as redirection
    # to intermediate pages (for example, for notifing the user that
    # he is leaving the site).
    def make_direct_link(url) #:doc:
      url
    end

    # Sanatize and prefix image URLs. When images are encountered in
    # Creole text, this function is called to obtain the actual URL of
    # the image. The default behaviour is to return the image link
    # as-is. No image tags are inserted if the function returns nil.
    #
    # Custom version of the method can be used to sanatize URLs
    # (e.g. remove query-parts), inhibit off-site images, or add a
    # base URL, for example:
    #
    # def make_image_link(url)
    # URI.join("http://mywiki.org/images/", url)
    # end
    def make_image_link(url) #:doc:
      url
    end

    # Create image markup. This
    # method can be overridden to generate custom
    # markup, for example to add html additional attributes or
    # to put divs around the imgs.
    def make_image(uri, alt='')
      if alt
        "<img src='" << escape_html(uri) << "' alt='" << escape_html(alt) << "'/>"
      else
        "<img src='" << escape_html(uri) << "'/>"
      end
    end

    def make_headline(level, text)
      "<h#{level}>" << escape_html(text) << "</h#{level}>"
    end

    def make_explicit_link(link)
      begin
        uri = URI.parse(link)
        return uri.to_s if uri.scheme && @allowed_schemes.include?(uri.scheme)
      rescue URI::InvalidURIError
      end
      make_local_link(link)
    end

    def parse_inline(str)
      until str.empty?
        case str
        # raw url
        when /\A(\~)?((https?|ftps?):\/\/\S+?)(?=([\,.?!:;"'\)]+)?(\s|$))/
          str = $'
          if $1
            @out << escape_html($2)
          else
            if uri = make_direct_link($2)
              @out << '<a href="' << escape_html(uri) << '">' << escape_html($2) << '</a>'
            else
              @out << escape_html($&)
            end
          end
        # [[Image(pic.jpg|tag)]]
        when /\A\[\[Image\(([^|].*?)(\|(.*?))?\)\]\]/   # image 
          str = $'
          @out << make_image($1, $3)
        # [[link]]
        when /\A\[\[\s*([^|]*?)\s*(\|\s*(.*?))?\s*\]\]/m
          str = $'
          link, content, whole= $1, $3, $&
          make_link(link, content, whole)
        else
          str = parse_inline_tag(str)
        end

      end
    end

    def make_link(link, content, whole)
      uri = make_explicit_link(link)
      # specail "link" [[BR]]:
      if link =~ /br/i
        @out << '<br/>'
        return
      end
      if not uri
        @out << escape_html(whole)
        return
      end

      make_explicit_link(link)
      @out << '<a href="' << escape_html(uri) << '">'
      if content
        until content.empty?
          content = parse_inline_tag(content)
        end
      else
          @out << escape_html(link)
      end
      @out << '</a>'
    end

    def parse_inline_tag(str)
      case str
      when /\A\{\{\{(.*?\}*)\}\}\}/     # inline pre (tt)
        @out << '<tt>' << escape_html($1) << '</tt>'
      when /\A`(.*?)`/                  # inline pre (tt)
        @out << '<tt>' << escape_html($1) << '</tt>'
#      when /\A\[\[Image\(([^|].*?)(\|(.*?))?\)\]\]/   # image 
#       @out << make_image($1, $3)

#      when /\A\{\{\s*(.*?)\s*(\|\s*(.*?)\s*)?\}\}/
#        if uri = make_image_link($1)
#          @out << make_image(uri, $3)
#        else
#          @out << escape_html($&)
#        end                             # link

      when /\A([:alpha:]|[:digit:])+/
        @out << $&                      # word
      when /\A\s+/
        @out << ' ' if @out[-1] != ?\s  # spaces
      when /\A'''''/
        toggle_tag 'strongem', $&       # bolditallic
      when /\A\*\*/, /\A'''/
        toggle_tag 'strong', $&         # bold
      when /\A''/, /\A\/\//
        toggle_tag 'em', $&             # italic
      when /\A\\\\/, /\A\[\[br\]\]/i
        @out << '<br/>'                 # newline
      when /\A__/
        toggle_tag 'u', $&              # underline
      when /\A~~/
        toggle_tag 'del', $&            # delete
#      when /\A\+\+/
#        toggle_tag 'ins', $&           # insert
      when /\A\^/
        toggle_tag 'sup', $&            # ^{}
      when /\A,,/
        toggle_tag 'sub', $&            # _{}
      when /\A\(R\)/i
        @out << '&#174;'                # (R)
      when /\A\(C\)/i
        @out << '&#169;'                # (C)
      when /\A!([^\s])/
        @out << escape_html($1)         # !neco
      when /./
        @out << escape_html($&)         # ordinal char
      end
      return $'
    end

    def parse_table_row(str)
      start_tag('tr') if !@stack.include?('tr')
      colspan = 1
      print_tr = true
      last_tail  = ''
      last_txt  = ''
      str.scan(/(=?)(\s*)(.*?)\1?($ | \|\|\\\s*$ | \|\| )/x) do
        tdth = $1.empty? ? 'td' : 'th'
        le, txt, tail  = $2.size, $3, $4

        # do not end row, continue on next line
        print_tr = false if tail =~ /^\|\|\\/

        if txt.empty? && le == 0
          colspan += 1
          next
        end

        style = ''
        if  txt =~ /\S(\s*)$/
              ri = $1.size
              ri += 100 if tail.empty? # do not right when last || omnited
              style = " style='text-align:right'"  if ri == 0 && le >= 1
              style = " style='text-align:center'" if le >= 2 && ri >= 2
              #print "le#{le} ri#{ri} st:#{style}\n"
        end

        colspan_txt  =  colspan > 1 ? " colspan='#{colspan}'" : ''
        start_tag(tdth, style + colspan_txt);
        colspan = 1

        parse_inline(txt.strip) if txt
        end_tag while @stack.last != 'tr'
      end
      if print_tr
        end_tag
      end
    end

    def make_nowikiblock(input)
      input.gsub(/^ (?=\}\}\})/, '')
    end

    def parse_li_line(spc_size, bullet, text)

      while !@stacki.empty? && @stacki.last >  spc_size
        end_tag
      end

      if @stack.include?('li')
        while @stack.last != 'li'
          end_tag
        end

        # end list if type differ
        # @stack.last is now ul or li
        if @stacki.last == spc_size
          end_tag # li
          ulol_last = @stack.last
          ulol_now =  bullet =~ /[*-]/ ? 'ul' : 'ol'
          if ulol_last != ulol_now
            end_tag # ol | ul
          end
        end
      else
        end_paragraph
      end

      if @stacki.empty? || @stacki.last <  spc_size
        bullet.gsub!(/\.$/,'')
        ulol = bullet =~ /[-*]/ ? 'ul' : 'ol';
        attr = ""
        attr = " type='i'" if bullet =~ /i/i;
        attr = " type='a'" if bullet =~ /a/i;

        if bullet =~ /^\d+$/ && bullet != '1'
                attr += " start='#{bullet}'"
        end
        start_tag(ulol, attr, spc_size)
      end

      start_tag('li')
      parse_inline(text)

    end

    def blockquote_level_to(level)
      cur_level = @stack.count('blockquote')
      if cur_level ==  level
        @out << ' '
        return
      end
      while cur_level < level
        cur_level += 1
        start_tag('blockquote')
      end
      while cur_level > level
        cur_level -= 1 if @stack.last == 'blockquote'
        end_tag
      end
    end

    def parse_block(str)
      until str.empty?
        case str

        # pre {{{ ... }}}
        when /\A\{\{\{\r?\n(.*?)\r?\n\}\}\}/m
          end_paragraph
          nowikiblock = make_nowikiblock($1)
          @out << '<pre>' << escape_html(nowikiblock) << '</pre>'

        # horizontal rule
        when /\A\s*-{4,}\s*$/
          end_paragraph
          @out << '<hr/>'

        # heading == Wiki Ruless ==
        when /\A\s*(={1,6})\s*(.*?)\s*=*\s*$(\r?\n)?/
          end_paragraph
          level = $1.size
          @out << make_headline(level, $2)

        # table row
        when /\A[ \t]*\|\|(.*)$(\r?\n)?/
          if !@stack.include?('table')
            end_paragraph
            start_tag('table')
          end
          parse_table_row($1)

        # empty line
        when /\A\s*$(\r?\n)?/
          end_paragraph
        when /\A([\w\s]*)::\s*/
          term = $1
          start_tag('dl')
          start_tag('dt')
          @out << escape_html(term)
          end_tag
          start_tag('dd')

        # li
        when /\A(\s*)([*-]|[aAIi\d]\.)\s+(.*?)$(\r?\n)?/
          parse_li_line($1.size, $2, $3)

        when /\A(>[>\s]*)(.*?)$(\r?\n)?/
          # citation
          level, quote =  $1.count('>'), $2

          start_paragraph if !@stack.include? 'p'
          blockquote_level_to(level)
          parse_inline(quote.strip)


        # ordinary line
        when /\A(\s*)(\S+.*?)$(\r?\n)?/
          spc_size, text =  $1.size, $2

          if @stack.include?('li') ||@stack.include?('dl')

            # dl, li continuation
            parse_inline(' ')
            parse_inline(text)

          elsif spc_size > 0
            # quote continuation
            start_paragraph if !@stack.include? 'p'
            blockquote_level_to(1)
            parse_inline(text)

          else
            # real ordinary line
            start_paragraph
            parse_inline(text)
          end
        else # case str
          raise "Parse error at #{str[0,30].inspect}"
        end
        str = $'
      end
      end_paragraph
      @out
    end
  end
end