# encoding=utf-8
module Polytexnic
  module Preprocessor
    module Polytex
      include Polytexnic::Literal

      # Converts Markdown to PolyTeX.
      # We adopt a unified approach: rather than convert "Markdown" (I use
      # the term loosely*) directly to HTML, we convert it to PolyTeX and
      # then run everything through the PolyTeX pipeline. Happily, kramdown
      # comes equipped with a `to_latex` method that does most of the heavy
      # lifting. The ouput isn't as clean as that produced by Pandoc (our
      # previous choice), but it comes with significant advantages: (1) It's
      # written in Ruby, available as a gem, so its use eliminates an external
      # dependency. (2) It's the foundation for the "Markdown" interpreter
      # used by Leanpub, so by using it ourselves we ensure greater
      # compatibility with Leanpub books.
      #
      # * <rant>The number of mutually incompatible markup languages going
      # by the name "Markdown" is truly mind-boggling. Most of them add things
      # to John Gruber's original Markdown language in an ever-expanding
      # attempt to bolt on the functionality needed to write longer documents.
      # At this point, I fear that "Markdown" has become little more than a
      # marketing term.</rant>
      def to_polytex
        require 'kramdown'
        cache = {}
        math_cache = {}
        cleaned_markdown = cache_code_environments
        puts cleaned_markdown if debug?
        cleaned_markdown.tap do |markdown|
          convert_code_inclusion(markdown, cache)
          cache_latex_literal(markdown, cache)
          cache_raw_latex(markdown, cache)
          cache_image_locations(markdown, cache)
          puts markdown if debug?
          cache_math(markdown, math_cache)
        end
        puts cleaned_markdown if debug?
        # Override the header ordering, which starts with 'section' by default.
        lh = 'chapter,section,subsection,subsubsection,paragraph,subparagraph'
        kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh)
        puts kramdown.inspect if debug?
        puts kramdown.to_html if debug?
        puts kramdown.to_latex if debug?
        @source = kramdown.to_latex.tap do |polytex|
                    remove_comments(polytex)
                    convert_includegraphics(polytex)
                    convert_tt(polytex)
                    restore_math(polytex, math_cache)
                    restore_hashed_content(polytex, cache)
                  end
      end

      # Adds support for <<(path/to/code) inclusion.
      def convert_code_inclusion(text, cache)
        text.gsub!(/^\s*(<<\(.*?\))/) do
          key = digest($1)
          cache[key] = "%= #{$1}"  # reduce to a previously solved case
          key
        end
      end

      # Caches literal LaTeX environments.
      def cache_latex_literal(markdown, cache)
        Polytexnic::Literal.literal_types.each do |literal|
          regex = /(\\begin\{#{Regexp.escape(literal)}\}
                  .*?
                  \\end\{#{Regexp.escape(literal)}\})
                  /xm
          markdown.gsub!(regex) do
            key = digest($1)
            cache[key] = $1
            key
          end
        end
      end

      # Caches raw LaTeX commands to be passed through the pipeline.
      def cache_raw_latex(markdown, cache)
        command_regex = /(
                          ^[ \t]*\\\w+.*\}[ \t]*$ # Command on line with arg
                          |
                          ~\\ref\{.*?\}     # reference with a tie
                          |
                          ~\\eqref\{.*?\}   # eq reference with a tie
                          |
                          \\[^\s]+\{.*?\}   # command with one arg
                          |
                          \\\w+             # normal command
                          |
                          \\-               # hyphenation
                          |
                          \\[ %&$\#@]       # space or special character
                          |
                          \\\\              # double backslashes
                          )
                        /x
        markdown.gsub!(command_regex) do
          content = $1
          puts content.inspect if debug?
          key = digest(content)
          cache[key] = content

          if content =~ /\{table\}|\\caption\{/
            # Pad tables & captions with newlines for kramdown compatibility.
            "\n#{key}\n"
          else
            key
          end
        end
      end

      # Caches the locations of images to be passed through the pipeline.
      # This works around a Kramdown bug, which fails to convert images
      # properly when their location includes a URL.
      def cache_image_locations(text, cache)
        # Matches '![Image caption](/path/to/image)'
        text.gsub!(/^\s*(!\[.*?\])\((.*?)\)/) do
          key = digest($2)
          cache[key] = $2
          "\n#{$1}(#{key})"
        end
      end

      # Restores raw code from the cache
      def restore_hashed_content(text, cache)
        cache.each do |key, value|
          # Because of the way backslashes get interpolated, we need to add
          # some extra ones to cover all the cases of hashed LaTeX.
          text.gsub!(key, value.gsub(/\\/, '\\\\\\'))
        end
      end

      # Caches Markdown code environments.
      # Included are indented environments, Leanpub-style indented environments,
      # and GitHub-style code fencing.
      def cache_code_environments
        output = []
        lines = @source.split("\n")
        indentation = ' ' * 4
        while (line = lines.shift)
          if line =~ /\{lang="(.*?)"\}/
            language = $1
            code = []
            while (line = lines.shift) && line.match(/^#{indentation}(.*)$/) do
              code << $1
            end
            code = code.join("\n")
            key = digest(code)
            code_cache[key] = [code, language]
            output << key
            output << line
          elsif line =~ /^```\s*$/        # basic code fences
            while (line = lines.shift) && !line.match(/^```\s*$/)
              output << indentation + line
            end
            output << "\n"
          elsif line =~ /^```(\w+)(,\s*options:.*)?$/  # highlighted fences
            language = $1
            options  = $2
            code = []
            while (line = lines.shift) && !line.match(/^```\s*$/) do
              code << line
            end
            code = code.join("\n")
            data = [code, language, false, options]
            key = digest(data.join("--"))
            code_cache[key] = data
            output << key
          else
            output << line
          end
        end
        output.join("\n")
      end

      # Converts \includegraphics to \image.
      # The reason is that raw \includegraphics is almost always too wide
      # in the PDF. Instead, we use the custom-defined \image command, which
      # is specifically designed to fix this issue.
      def convert_includegraphics(text)
        text.gsub!('\includegraphics', '\image')
      end

      # Converts {tt ...} to \kode{...}
      # This effectively converts `inline code`, which kramdown sets as
      # {\tt inline code}, to PolyTeX's native \kode command, which in
      # turns allows inline code to be separately styled.
      def convert_tt(text)
        text.gsub!(/\{\\tt (.*?)\}/, '\kode{\1}')
      end

      # Caches math.
      # Leanpub uses the notation {$$}...{/$$} for both inline and block math,
      # with the only difference being the presences of newlines:
      #     {$$} x^2 {/$$}  % inline
      # and
      #     {$$}
      #     x^2             % block
      #     {/$$}
      # I personally hate this notation and convention, so we also support
      # LaTeX-style \( x \) and \[ x^2 - 2 = 0 \] notation.
      def cache_math(text, cache)
        text.gsub!(/(?:\{\$\$\}\n(.*?)\n\{\/\$\$\}|\\\[(.*?)\\\])/) do
          key = digest($1 || $2)
          cache[[:block, key]] = $1 || $2
          key
        end
        text.gsub!(/(?:\{\$\$\}(.*?)\{\/\$\$\}|\\\((.*?)\\\))/) do
          key = digest($1 || $2)
          cache[[:inline, key]] = $1 || $2
          key
        end
      end

      # Restores the Markdown math.
      # This is easy because we're running everything through our LaTeX
      # pipeline.
      def restore_math(text, cache)
        cache.each do |(kind, key), value|
          case kind
          when :inline
            open  = '\('
            close =  '\)'
          when :block
            open  = '\[' + "\n"
            close = "\n" + '\]'
          end
          text.gsub!(key, open + value + close)
        end
      end
    end
  end
end