lib/polytexnic/preprocessors/polytex.rb in polytexnic-0.7.7 vs lib/polytexnic/preprocessors/polytex.rb in polytexnic-0.8.0

- old
+ new

@@ -1,6 +1,67 @@ # encoding=utf-8 + +require 'kramdown' +require 'securerandom' + +$cache = {} +$label_salt = SecureRandom.hex + +module Kramdown + module Converter + class Latex < Base + + # Converts `inline codespan`. + # This overrides kramdown's default to use `\kode` instead of `\tt`. + def convert_codespan(el, opts) + "\\kode{#{latex_link_target(el)}#{escape(el.value)}}" + end + + # Overrides default convert_a. + # Unfortunately, kramdown is too aggressive in escaping characters + # in hrefs, converting + # [foo bar](http://example.com/foo%20bar) + # into + # \href{http://example.com/foo\%20bar}{foo bar} + # The '\%20' in the href then won't work properly. + def convert_a(el, opts) + url = el.attr['href'] + if url =~ /^#/ + "\\hyperlink{#{escape(url[1..-1])}}{#{inner(el, opts)}}" + else + "\\href{#{url}}{#{inner(el, opts)}}" + end + end + + alias_method :original_convert_standalone_image, :convert_standalone_image + + # Uses figures for images only when label is present. + # This allows users to put raw (centered) images in their documents. + # The default behavior of kramdown is to wrap such images in a figure + # environment, which causes LaTeX to (a) treat them as floats and (b) + # include a caption. This may not be what the user wants, and it's also + # nonstandard Markdown. On the other hand, it is really nice to be + # able to include captions using the default image syntax, so as a + # compromise we use Markdown behavior by default and kramdown behavior + # if the alt text contains a '\label' element. + def convert_standalone_image(el, opts, img) + alt_text = el.children.first.attr['alt'] + if has_label?(alt_text) + original_convert_standalone_image(el, opts, img) + else + img.gsub('\includegraphics', '\image') + "\n" + end + end + + # Detects if text has a label. + def has_label?(text) + text.include?($label_salt) + end + end + end +end + module Polytexnic module Preprocessor module Polytex include Polytexnic::Literal @@ -21,20 +82,18 @@ # to John Gruber's original Markdown language in an ever-expanding # attempt to bolt on the functionality needed to write longer documents. # At this point, I fear that "Markdown" has become little more than a # marketing term.</rant> def to_polytex - require 'kramdown' - cache = {} math_cache = {} cleaned_markdown = cache_code_environments(@source) puts cleaned_markdown if debug? cleaned_markdown.tap do |markdown| - convert_code_inclusion(markdown, cache) - cache_latex_literal(markdown, cache) - cache_raw_latex(markdown, cache) - cache_image_locations(markdown, cache) + convert_code_inclusion(markdown) + cache_latex_literal(markdown) + cache_raw_latex(markdown) + cache_image_locations(markdown) puts markdown if debug? cache_math(markdown, math_cache) end puts cleaned_markdown if debug? # Override the header ordering, which starts with 'section' by default. @@ -42,46 +101,46 @@ kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh) puts kramdown.inspect if debug? puts kramdown.to_html if debug? puts kramdown.to_latex if debug? @source = kramdown.to_latex.tap do |polytex| - remove_comments(polytex) + remove_kramdown_comments(polytex) convert_includegraphics(polytex) - convert_tt(polytex) restore_math(polytex, math_cache) - restore_hashed_content(polytex, cache) + restore_hashed_content(polytex) end end # Adds support for <<(path/to/code) inclusion. - def convert_code_inclusion(text, cache) + def convert_code_inclusion(text) text.gsub!(/^\s*(<<\(.*?\))/) do key = digest($1) - cache[key] = "%= #{$1}" # reduce to a previously solved case + $cache[key] = "%= #{$1}" # reduce to a previously solved case key end end # Caches literal LaTeX environments. - def cache_latex_literal(markdown, cache) + def cache_latex_literal(markdown) # Add tabular and tabularx support. literal_types = Polytexnic::Literal.literal_types + %w[tabular tabularx] literal_types.each do |literal| regex = /(\\begin\{#{Regexp.escape(literal)}\} .*? \\end\{#{Regexp.escape(literal)}\}) /xm markdown.gsub!(regex) do - key = digest($1) - cache[key] = $1 + content = $1 + key = digest(content) + $cache[key] = content key end end end # Caches raw LaTeX commands to be passed through the pipeline. - def cache_raw_latex(markdown, cache) + def cache_raw_latex(markdown) command_regex = /( ^[ \t]*\\\w+.*\}[ \t]*$ # Command on line with arg | ~\\ref\{.*?\} # reference with a tie | @@ -100,11 +159,13 @@ /x markdown.gsub!(command_regex) do content = $1 puts content.inspect if debug? key = digest(content) - cache[key] = content + # Used to speed up has_label? in convert_standalone_image. + key += $label_salt if content.include?('\label') + $cache[key] = content if content =~ /\{table\}|\\caption\{/ # Pad tables & captions with newlines for kramdown compatibility. "\n#{key}\n" else @@ -114,22 +175,22 @@ end # Caches the locations of images to be passed through the pipeline. # This works around a Kramdown bug, which fails to convert images # properly when their location includes a URL. - def cache_image_locations(text, cache) + def cache_image_locations(text) # Matches '![Image caption](/path/to/image)' text.gsub!(/^\s*(!\[.*?\])\((.*?)\)/) do key = digest($2) - cache[key] = $2 + $cache[key] = $2 "\n#{$1}(#{key})" end end - # Restores raw code from the cache - def restore_hashed_content(text, cache) - cache.each do |key, value| + # Restores raw code from the cache. + def restore_hashed_content(text) + $cache.each do |key, value| # Because of the way backslashes get interpolated, we need to add # some extra ones to cover all the cases of hashed LaTeX. text.gsub!(key, value.gsub(/\\/, '\\\\\\')) end end @@ -175,26 +236,35 @@ end end output.join("\n") end - # Converts \includegraphics to \image. + # Removes comments produced by kramdown. + # These have the special form of always being at the beginning of the + # line. + def remove_kramdown_comments(text) + text.gsub!(/^% (.*)$/, '') + end + + # Converts \includegraphics to \image inside figures. # The reason is that raw \includegraphics is almost always too wide # in the PDF. Instead, we use the custom-defined \image command, which # is specifically designed to fix this issue. def convert_includegraphics(text) - text.gsub!('\includegraphics', '\image') + in_figure = false + newtext = text.split("\n").map do |line| + line.gsub!('\includegraphics', '\image') if in_figure + if line =~ /^\s*\\begin\{figure\}/ + in_figure = true + elsif line =~ /^\s*\\end\{figure\}/ + in_figure = false + end + line + end.join("\n") + text.replace(newtext) end - # Converts {tt ...} to \kode{...} - # This effectively converts `inline code`, which kramdown sets as - # {\tt inline code}, to PolyTeX's native \kode command, which in - # turns allows inline code to be separately styled. - def convert_tt(text) - text.gsub!(/\{\\tt (.*?)\}/, '\kode{\1}') - end - # Caches math. # Leanpub uses the notation {$$}...{/$$} for both inline and block math, # with the only difference being the presences of newlines: # {$$} x^2 {/$$} % inline # and @@ -202,16 +272,16 @@ # x^2 % block # {/$$} # I personally hate this notation and convention, so we also support # LaTeX-style \( x \) and \[ x^2 - 2 = 0 \] notation. def cache_math(text, cache) - text.gsub!(/(?:\{\$\$\}\n(.*?)\n\{\/\$\$\}|\\\[(.*?)\\\])/) do + text.gsub!(/(?:\{\$\$\}\n(.*?)\n\{\/\$\$\}|\\\[(.*?)\\\])/m) do key = digest($1 || $2) cache[[:block, key]] = $1 || $2 key end - text.gsub!(/(?:\{\$\$\}(.*?)\{\/\$\$\}|\\\((.*?)\\\))/) do + text.gsub!(/(?:\{\$\$\}(.*?)\{\/\$\$\}|\\\((.*?)\\\))/m) do key = digest($1 || $2) cache[[:inline, key]] = $1 || $2 key end end @@ -224,11 +294,11 @@ case kind when :inline open = '\(' close = '\)' when :block - open = '\[' + "\n" - close = "\n" + '\]' + open = '\[' + close = '\]' end text.gsub!(key, open + value + close) end end end