polytex.rb in polytexnic-0.8.0

- old
+ new

@@ -1,6 +1,67 @@
 # encoding=utf-8
+
+require 'kramdown'
+require 'securerandom'
+
+$cache = {}
+$label_salt = SecureRandom.hex
+
+module Kramdown
+  module Converter
+    class Latex < Base
+
+      # Converts `inline codespan`.
+      # This overrides kramdown's default to use `\kode` instead of `\tt`.
+      def convert_codespan(el, opts)
+        "\\kode{#{latex_link_target(el)}#{escape(el.value)}}"
+      end
+
+      # Overrides default convert_a.
+      # Unfortunately, kramdown is too aggressive in escaping characters
+      # in hrefs, converting
+      #     [foo bar](http://example.com/foo%20bar)
+      # into
+      #     \href{http://example.com/foo\%20bar}{foo bar}
+      # The '\%20' in the href then won't work properly.
+      def convert_a(el, opts)
+        url = el.attr['href']
+        if url =~ /^#/
+          "\\hyperlink{#{escape(url[1..-1])}}{#{inner(el, opts)}}"
+        else
+          "\\href{#{url}}{#{inner(el, opts)}}"
+        end
+      end
+
+      alias_method :original_convert_standalone_image, :convert_standalone_image
+
+      # Uses figures for images only when label is present.
+      # This allows users to put raw (centered) images in their documents.
+      # The default behavior of kramdown is to wrap such images in a figure
+      # environment, which causes LaTeX to (a) treat them as floats and (b)
+      # include a caption. This may not be what the user wants, and it's also
+      # nonstandard Markdown. On the other hand, it is really nice to be
+      # able to include captions using the default image syntax, so as a
+      # compromise we use Markdown behavior by default and kramdown behavior
+      # if the alt text contains a '\label' element.
+      def convert_standalone_image(el, opts, img)
+        alt_text = el.children.first.attr['alt']
+        if has_label?(alt_text)
+          original_convert_standalone_image(el, opts, img)
+        else
+          img.gsub('\includegraphics', '\image') + "\n"
+        end
+      end
+
+      # Detects if text has a label.
+      def has_label?(text)
+        text.include?($label_salt)
+      end
+    end
+  end
+end
+
 module Polytexnic
   module Preprocessor
     module Polytex
       include Polytexnic::Literal
 
@@ -21,20 +82,18 @@
       # to John Gruber's original Markdown language in an ever-expanding
       # attempt to bolt on the functionality needed to write longer documents.
       # At this point, I fear that "Markdown" has become little more than a
       # marketing term.</rant>
       def to_polytex
-        require 'kramdown'
-        cache = {}
         math_cache = {}
         cleaned_markdown = cache_code_environments(@source)
         puts cleaned_markdown if debug?
         cleaned_markdown.tap do |markdown|
-          convert_code_inclusion(markdown, cache)
-          cache_latex_literal(markdown, cache)
-          cache_raw_latex(markdown, cache)
-          cache_image_locations(markdown, cache)
+          convert_code_inclusion(markdown)
+          cache_latex_literal(markdown)
+          cache_raw_latex(markdown)
+          cache_image_locations(markdown)
           puts markdown if debug?
           cache_math(markdown, math_cache)
         end
         puts cleaned_markdown if debug?
         # Override the header ordering, which starts with 'section' by default.
@@ -42,46 +101,46 @@
         kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh)
         puts kramdown.inspect if debug?
         puts kramdown.to_html if debug?
         puts kramdown.to_latex if debug?
         @source = kramdown.to_latex.tap do |polytex|
-                    remove_comments(polytex)
+                    remove_kramdown_comments(polytex)
                     convert_includegraphics(polytex)
-                    convert_tt(polytex)
                     restore_math(polytex, math_cache)
-                    restore_hashed_content(polytex, cache)
+                    restore_hashed_content(polytex)
                   end
       end
 
       # Adds support for <<(path/to/code) inclusion.
-      def convert_code_inclusion(text, cache)
+      def convert_code_inclusion(text)
         text.gsub!(/^\s*(<<\(.*?\))/) do
           key = digest($1)
-          cache[key] = "%= #{$1}"  # reduce to a previously solved case
+          $cache[key] = "%= #{$1}"  # reduce to a previously solved case
           key
         end
       end
 
       # Caches literal LaTeX environments.
-      def cache_latex_literal(markdown, cache)
+      def cache_latex_literal(markdown)
         # Add tabular and tabularx support.
         literal_types = Polytexnic::Literal.literal_types + %w[tabular tabularx]
         literal_types.each do |literal|
           regex = /(\\begin\{#{Regexp.escape(literal)}\}
                   .*?
                   \\end\{#{Regexp.escape(literal)}\})
                   /xm
           markdown.gsub!(regex) do
-            key = digest($1)
-            cache[key] = $1
+            content = $1
+            key = digest(content)
+            $cache[key] = content
             key
           end
         end
       end
 
       # Caches raw LaTeX commands to be passed through the pipeline.
-      def cache_raw_latex(markdown, cache)
+      def cache_raw_latex(markdown)
         command_regex = /(
                           ^[ \t]*\\\w+.*\}[ \t]*$ # Command on line with arg
                           |
                           ~\\ref\{.*?\}     # reference with a tie
                           |
@@ -100,11 +159,13 @@
                         /x
         markdown.gsub!(command_regex) do
           content = $1
           puts content.inspect if debug?
           key = digest(content)
-          cache[key] = content
+          # Used to speed up has_label? in convert_standalone_image.
+          key += $label_salt if content.include?('\label')
+          $cache[key] = content
 
           if content =~ /\{table\}|\\caption\{/
             # Pad tables & captions with newlines for kramdown compatibility.
             "\n#{key}\n"
           else
@@ -114,22 +175,22 @@
       end
 
       # Caches the locations of images to be passed through the pipeline.
       # This works around a Kramdown bug, which fails to convert images
       # properly when their location includes a URL.
-      def cache_image_locations(text, cache)
+      def cache_image_locations(text)
         # Matches '![Image caption](/path/to/image)'
         text.gsub!(/^\s*(!\[.*?\])\((.*?)\)/) do
           key = digest($2)
-          cache[key] = $2
+          $cache[key] = $2
           "\n#{$1}(#{key})"
         end
       end
 
-      # Restores raw code from the cache
-      def restore_hashed_content(text, cache)
-        cache.each do |key, value|
+      # Restores raw code from the cache.
+      def restore_hashed_content(text)
+        $cache.each do |key, value|
           # Because of the way backslashes get interpolated, we need to add
           # some extra ones to cover all the cases of hashed LaTeX.
           text.gsub!(key, value.gsub(/\\/, '\\\\\\'))
         end
       end
@@ -175,26 +236,35 @@
           end
         end
         output.join("\n")
       end
 
-      # Converts \includegraphics to \image.
+      # Removes comments produced by kramdown.
+      # These have the special form of always being at the beginning of the
+      # line.
+      def remove_kramdown_comments(text)
+        text.gsub!(/^% (.*)$/, '')
+      end
+
+      # Converts \includegraphics to \image inside figures.
       # The reason is that raw \includegraphics is almost always too wide
       # in the PDF. Instead, we use the custom-defined \image command, which
       # is specifically designed to fix this issue.
       def convert_includegraphics(text)
-        text.gsub!('\includegraphics', '\image')
+        in_figure = false
+        newtext = text.split("\n").map do |line|
+          line.gsub!('\includegraphics', '\image') if in_figure
+          if line =~ /^\s*\\begin\{figure\}/
+            in_figure = true
+          elsif line =~ /^\s*\\end\{figure\}/
+            in_figure = false
+          end
+          line
+        end.join("\n")
+        text.replace(newtext)
       end
 
-      # Converts {tt ...} to \kode{...}
-      # This effectively converts `inline code`, which kramdown sets as
-      # {\tt inline code}, to PolyTeX's native \kode command, which in
-      # turns allows inline code to be separately styled.
-      def convert_tt(text)
-        text.gsub!(/\{\\tt (.*?)\}/, '\kode{\1}')
-      end
-
       # Caches math.
       # Leanpub uses the notation {$$}...{/$$} for both inline and block math,
       # with the only difference being the presences of newlines:
       #     {$$} x^2 {/$$}  % inline
       # and
@@ -202,16 +272,16 @@
       #     x^2             % block
       #     {/$$}
       # I personally hate this notation and convention, so we also support
       # LaTeX-style \( x \) and \[ x^2 - 2 = 0 \] notation.
       def cache_math(text, cache)
-        text.gsub!(/(?:\{\$\$\}\n(.*?)\n\{\/\$\$\}|\\\[(.*?)\\\])/) do
+        text.gsub!(/(?:\{\$\$\}\n(.*?)\n\{\/\$\$\}|\\\[(.*?)\\\])/m) do
           key = digest($1 || $2)
           cache[[:block, key]] = $1 || $2
           key
         end
-        text.gsub!(/(?:\{\$\$\}(.*?)\{\/\$\$\}|\\\((.*?)\\\))/) do
+        text.gsub!(/(?:\{\$\$\}(.*?)\{\/\$\$\}|\\\((.*?)\\\))/m) do
           key = digest($1 || $2)
           cache[[:inline, key]] = $1 || $2
           key
         end
       end
@@ -224,11 +294,11 @@
           case kind
           when :inline
             open  = '\('
             close =  '\)'
           when :block
-            open  = '\[' + "\n"
-            close = "\n" + '\]'
+            open  = '\['
+            close = '\]'
           end
           text.gsub!(key, open + value + close)
         end
       end
     end