module Polytexnic module Literal extend self # Matches the line for syntax highlighting. # %= lang: [, options: ...] LANG_REGEX = /^\s*%=\s+lang:\s*(\w+)(?:,\s*options:(.*))?/ # Matches the line for code inclusion. # %= < # \end{code} # and then prepend the code to the current `lines` array. filename, custom_language, highlight_options = $1, $2, $3 extension_array = File.extname(filename).scan(/\.(.*)/).first lang_from_extension = extension_array.nil? ? nil : extension_array[0] if File.exist?(filename) language = custom_language || lang_from_extension || 'text' code = ["%= lang:#{language}#{highlight_options}"] code << '\begin{code}' code.concat(File.read(filename).split("\n")) code << '\end{code}' lines.unshift(*code) else lines.unshift("\\verb+ERROR: File '#{filename}' does not exist+") end elsif line.begin_literal? in_verbatim = true literal_type = line.literal_type skip = line.math_environment? || latex if line.math_environment? && !latex output << '\begin{xmlelement*}{equation}' output << '\begin{equation}' end math = line.math_environment? label = nil output << xmlelement(element(literal_type), skip) do count = 1 text = [] text << line if line.math_environment? || (latex && !language) while (line = lines.shift) if line.begin_literal?(literal_type) count += 1 elsif line.end_literal?(literal_type) count -= 1 if count.zero? in_verbatim = false text << line if line.math_environment? || (latex && !language) break end end label = line if math && line =~ /^\s*\\label{.*?}\s*$/ text << line end raise "Missing \\end{#{line.literal_type}}" if count != 0 content = text.join("\n") if math key = digest(content) literal_cache[key] = content elsif language.nil? key = digest(content) literal_cache[key] = content tag = 'literal' else format = latex ? 'latex' : 'html' id = "#{content}--#{language}--#{format}--#{in_codelisting}--#{highlight_options}" key = digest(id, salt: code_salt) code_cache[key] = [content, language, in_codelisting, highlight_options] tag = 'code' end if latex || tag == 'code' || math key else xmlelement(tag) { key } end end if math && !latex unless label.nil? key = digest(label) math_label_cache[key] = label output << key end output << '\end{equation}' unless label.nil? string = label.scan(/\{(.*?)\}/).flatten.first string = string.gsub(':', '-').gsub('_', underscore_digest) output << "\\xbox{data-label}{#{string}}" end output << '\end{xmlelement*}' end language = nil (output << '') unless latex # Force the next element to be a paragraph else output << line end end end # Returns a permanent salt syntax highlighting cache. def code_salt 'fbbc13ed4a51e27608037365e1d27a5f992b6339' end # Caches both display and inline math. def cache_display_inline_math(output) output.tap do cache_display_math(output) cache_inline_math(output) end end # Caches display math. # We support both TeX-style $$...$$ and LaTeX-style \[ ... \]. def cache_display_math(output) output.gsub!(/\\\[(.*?)\\\]|\$\$(.*?)\$\$/m) do math = "\\[ #{$1 || $2} \\]" equation_element(math) end end # Returns an equation element while caching the given content. # We use this only for unnumbered, display equations, which requires using # the `equation*` environment in place of `equation`. def equation_element(content) key = digest(content) literal_cache[key] = content "\\begin{xmlelement*}{equation} \\begin{equation*} #{key} \\end{equation*} \\end{xmlelement*}" end # Caches inline math. # We support both TeX-style $...$ and LaTeX-style \( ... \). # There's an annoying edge case involving literal dollar signs, as in \$. # Handling it significantly complicates the regex, and necessesitates # introducing an additional group to catch the character before the math # dollar sign in $2 and prepend it to the inline math element. def cache_inline_math(output) output.gsub!(/(?:\\\((.*?)\\\)|([^\\]|^)\$(.*?[^\\])\$)/m) do math = "\\( #{$1 || $3} \\)" key = digest(math) literal_cache[key] = math $2.to_s + xmlelement('inline') { key } end end # Converts references to hyperrefs. # We want to convert # Chapter~\ref{cha:foo} # to # \hyperref[cha:foo]{Chapter~\ref{cha:foo} # which is then handled by LaTeX's hyperref package # or by Tralics (where it converted to a link # by the postprocessor). # For completeness, we handle the case where the author neglects to # use the nonbreak space ~. def hyperrefs(string) linked_item = "(Chapter|Section|Table|Box|Figure|Fig\.|Listing" + "|Equation|Eq\.)" ref = /(?:#{linked_item}(~| ))*(\\(?:eq)*ref){(.*?)}/i string.gsub!(ref) do "\\hyperref[#{$4}]{#{$1}#{$2}#{$3}{#{$4}}}" end end # Handles non-ASCII Unicode characters. # The Tralics part of the pipeline doesn't properly handle Unicode, # which is odd since Tralics is a French project. Nevertheless, # we can hack around the restriction by treating non-ASCII Unicode # characters as literal elements and simply pass them through the # pipeline intact. def cache_unicode(string) non_ascii_unicode = /([^\x00-\x7F]+)/ string.gsub(non_ascii_unicode) do key = digest($1) literal_cache[key] = $1 xmlelement('unicode') { key } end end def element(literal_type) if math_environments.include?(literal_type) 'equation' else literal_type end end end end class String include Polytexnic::Literal # Returns true if self matches \begin{...} where ... is a literal environment. # Note: Support for the 'metacode' environment exists solely to allow # meta-discussion of the 'code' environment. def begin_literal?(literal_type = nil) return false unless include?('\begin') literal_type ||= "(?:verbatim|Verbatim|code|metacode|" + "#{math_environment_regex})" match(/^\s*\\begin{#{literal_type}}\s*$/) end # Returns true if self matches \end{...} where ... is a literal environment. def end_literal?(literal_type) return false unless include?('\end') match(/^\s*\\end{#{Regexp.escape(literal_type)}}\s*$/) end # Returns the type of literal environment. # '\begin{verbatim}' => 'verbatim' # '\begin{equation}' => 'equation' # '\[' => 'display' def literal_type scan(/\\begin{(.*?)}/).flatten.first || 'display' end # Returns true if self begins a math environment. def begin_math? return false unless include?('\begin') literal_type = "(?:#{math_environment_regex})" match(/^\s*\\begin{#{literal_type}}\s*$/) end # Returns true if self matches a valid math environment. def math_environment? match(/(?:#{math_environment_regex})/) end private # Returns a regex matching valid math environments. def math_environment_regex Polytexnic::Literal.math_environments.map do |s| Regexp.escape(s) end.join('|') end end