module Polytexnic module Literal extend self # Matches the line for syntax highlighting. # %= lang: [, options: ...] LANG_REGEX = /^\s*%=\s+lang:\s*(\w+)(?:,\s*options:(.*))?/ # Matches the line for code inclusion. # %= < # \end{code} # and then prepend the code to the current `lines` array. filename, sectionname, custom_language, highlight_options = $1, $2, $3, $4 if filename lines.unshift(*include_code(filename, sectionname, custom_language, highlight_options)) end elsif line.begin_literal? in_verbatim = true literal_type = line.literal_type skip = line.math_environment? || latex if line.math_environment? && !latex output << '\begin{xmlelement*}{equation}' output << '\begin{equation}' end math = line.math_environment? label = nil output << xmlelement(element(literal_type), skip) do count = 1 text = [] text << line if line.math_environment? || (latex && !language) while (line = lines.shift) if line.begin_literal?(literal_type) count += 1 elsif line.end_literal?(literal_type) count -= 1 if count.zero? in_verbatim = false text << line if line.math_environment? || (latex && !language) break end end label = line if math && line =~ /^\s*\\label{.*?}\s*$/ text << line end raise "Missing \\end{#{line.literal_type}}" if count != 0 content = text.join("\n") if math key = digest(content) literal_cache[key] = content elsif language.nil? key = digest(content) literal_cache[key] = content tag = 'literal' else format = latex ? 'latex' : 'html' id = "#{content}--#{language}--#{format}--#{in_codelisting}--#{highlight_options}" key = digest(id, salt: code_salt) code_cache[key] = [content, language, in_codelisting, highlight_options] tag = 'code' end if latex || tag == 'code' || math key else xmlelement(tag) { key } end end if math && !latex unless label.nil? key = digest(label) math_label_cache[key] = label output << key end output << '\end{equation}' unless label.nil? string = label.scan(/\{(.*?)\}/).flatten.first string = string.gsub(':', '-').gsub('_', underscore_digest) output << "\\xbox{data-label}{#{string}}" end output << '\end{xmlelement*}' end language = nil (output << '') unless latex # Force the next element to be a paragraph else output << line end end end # Returns the marked up file or section to be included, # or an error message if file or section does not exist. def include_code(filename, sectionname, custom_language, highlight_options) reader = (sectionname ? IncludedSectionReader : IncludedFileReader).new lang = "#{code_language(filename, custom_language)}#{highlight_options}" code = ["%= lang:#{lang}"] code << '\begin{code}' code.concat(reader.read(filename, sectionname)) code << '\end{code}' rescue FileNotFound => e code_error("File '#{e.message}' does not exist") rescue SectionNotFound => e msg = e.message err = "Could not find section header '#{msg}' in file '#{filename}'" code_error(err) end def code_error(details) "\\verb+ERROR: #{details}+" end def code_language(filename, custom_language) extension_array = File.extname(filename).scan(/\.(.*)/).first lang_from_extension = extension_array.nil? ? nil : extension_array[0] language = custom_language || lang_from_extension || 'text' end # Returns a permanent salt for the syntax highlighting cache. def code_salt 'fbbc13ed4a51e27608037365e1d27a5f992b6339' end # Caches both display and inline math. def cache_display_inline_math(output) output.tap do cache_display_math(output) cache_inline_math(output) end end # Caches display math. # We support both TeX-style $$...$$ and LaTeX-style \[ ... \]. def cache_display_math(output) output.gsub!(/\\\[(.*?)\\\]|\$\$(.*?)\$\$/m) do math = "\\[ #{$1 || $2} \\]" equation_element(math) end end # Returns an equation element while caching the given content. # We use this only for unnumbered, display equations, which requires using # the `equation*` environment in place of `equation`. def equation_element(content) key = digest(content) literal_cache[key] = content "\\begin{xmlelement*}{equation} \\begin{equation*} #{key} \\end{equation*} \\end{xmlelement*}" end # Caches inline math. # We support both TeX-style $...$ and LaTeX-style \( ... \). # There's an annoying edge case involving literal dollar signs, as in \$. # Handling it significantly complicates the regex, and necessesitates # introducing an additional group to catch the character before the math # dollar sign in $2 and prepend it to the inline math element. def cache_inline_math(output) output.gsub!(/(?:\\\((.*?)\\\)|([^\\]|^)\$(.*?[^\\])\$)/m) do math = "\\( #{$1 || $3} \\)" key = digest(math) literal_cache[key] = math $2.to_s + xmlelement('inline') { key } end end # Converts references to hyperrefs. # We want to convert # Chapter~\ref{cha:foo} # to # \hyperref[cha:foo]{Chapter~\ref{cha:foo} # which is then handled by LaTeX's hyperref package # or by Tralics (where it converted to a link # by the postprocessor). # For completeness, we handle the case where the author neglects to # use the nonbreak space ~. def hyperrefs(string) chapter = language_labels["chapter"]["word"] section = language_labels["section"] table = language_labels["table"] box = language_labels["aside"] figure = language_labels["figure"] fig = language_labels["fig"] listing = language_labels["listing"] equation = language_labels["equation"] eq = language_labels["eq"] linked_item = "(#{chapter}|#{section}|#{table}|#{box}|#{figure}" + "|#{fig}\.|#{listing}|#{equation}|#{eq}\.)" ref = /(?:#{linked_item}(~| ))*(\\(?:eq)*ref){(.*?)}/i string.gsub!(ref) do "\\hyperref[#{$4}]{#{$1}#{$2}#{$3}{#{$4}}}" end end # Handles non-ASCII Unicode characters. # The Tralics part of the pipeline doesn't properly handle Unicode, # which is odd since Tralics is a French project. Nevertheless, # we can hack around the restriction by treating non-ASCII Unicode # characters as literal elements and simply pass them through the # pipeline intact. def cache_unicode(string) non_ascii_unicode = /([^\x00-\x7F]+)/ string.gsub!(non_ascii_unicode) do key = digest($1) literal_cache[key] = $1 xmlelement('unicode') { key } end end def element(literal_type) if math_environments.include?(literal_type) 'equation' else literal_type end end class FileNotFound < Exception; end; class IncludedFileReader def read(filename, _) raise(FileNotFound, filename) unless File.exist?(filename) File.read(filename).split("\n") end end class SectionNotFound < Exception; end; class IncludedSectionReader < IncludedFileReader attr_reader :lines, :sectionname def read(filename, sectionname) @lines = super @sectionname = sectionname raise(SectionNotFound, section_begin_text) unless exist? lines.slice(index_of_first_line, length) end private def exist? !!index_of_section_begin end def index_of_section_begin @section_begin_i ||= lines.index(section_begin_text) end def index_of_first_line @first_line_i ||= index_of_section_begin + 1 end def length lines.slice(index_of_first_line, lines.size).index(section_end_text) end def marker '#//' end def section_begin_text "#{marker} begin #{sectionname}" end def section_end_text "#{marker} end" end end end end class String include Polytexnic::Literal # Returns true if self matches \begin{...} where ... is a literal environment. # Note: Support for the 'metacode' environment exists solely to allow # meta-discussion of the 'code' environment. def begin_literal?(literal_type = nil) return false unless include?('\begin') literal_type ||= "(?:verbatim|Verbatim|code|metacode|" + "#{math_environment_regex})" match(/^\s*\\begin{#{literal_type}}\s*$/) end # Returns true if self matches \end{...} where ... is a literal environment. def end_literal?(literal_type) return false unless include?('\end') match(/^\s*\\end{#{Regexp.escape(literal_type)}}\s*$/) end # Returns the type of literal environment. # '\begin{verbatim}' => 'verbatim' # '\begin{equation}' => 'equation' # '\[' => 'display' def literal_type scan(/\\begin{(.*?)}/).flatten.first || 'display' end # Returns true if self begins a math environment. def begin_math? return false unless include?('\begin') literal_type = "(?:#{math_environment_regex})" match(/^\s*\\begin{#{literal_type}}\s*$/) end # Returns true if self matches a valid math environment. def math_environment? match(/(?:#{math_environment_regex})/) end private # Returns a regex matching valid math environments. def math_environment_regex Polytexnic::Literal.math_environments.map do |s| Regexp.escape(s) end.join('|') end end