# encoding: utf-8 require 'cgi' require 'uri' require 'yaml' require 'base64' require 'digest/sha2' require 'unicode_utils/compatibility_decomposition' # :main: TracWiki # The TracWiki parses and translates Trac formatted text into # XHTML. TracWiki is a lightweight markup syntax similar to what many # WikiWikiWebs use. Example syntax: # # = Heading 1 = # == Heading 2 == # === Heading 3 === # **Bold text** # ''Italic text'' # [[Links]] # ||=Table=||=Heading=|| # || Table || Cells || # [[Image(image.png)]] # [[Image(image.png, options)]] # # for more see http://trac.edgewall.org/wiki/WikiFormatting # # You can customize the created image markup by overriding # make_image. # Main TracWiki parser class. Call TracWikiParser#parse to parse # TracWiki formatted text. # # This class is not reentrant. A separate instance is needed for # each thread that needs to convert Creole to HTML. # # Inherit this to provide custom handling of links. The overrideable # methods are: make_local_link module TracWiki class TooLongException < Exception end class Parser # Allowed url schemes # Examples: http https ftp ftps attr_accessor :allowed_schemes # structure where headings are stroed # list of hasheses with `level` and `title`, `sline` # [ { leven: 1, #

# sline: 3, # line where head starts # eline: 4, # line before next heading starts # aname: 'anchor-to-this-heading', # title: 'heading title' # }, # ... # ] attr_accessor :headings attr_accessor :env attr_accessor :base_url attr_reader :used_templates @used_templates = {} # url base for links attr_writer :base # url base for /links attr_writer :root # Disable url escaping for local links # Escaping: [[/Test]] --> %2FTest # No escaping: [[/Test]] --> Test attr_writer :no_escape def no_escape?; @no_escape; end # Disable url escaping for local links # [[whatwerver]] stays [[whatwerver]] attr_writer :no_link def no_link?; @no_link; end # Enable
around attr_writer :div_around_table def div_around_table?; @div_around_table; end # math syntax extension: # $e^x$ for inline math # $$ e^x $$ for display math attr_writer :math def math?; @math; end # allow some
# html will be sanitized attr_writer :allow_html def allow_html?; @allow_html; end # add 'mine attr_writer :merge def merge?; @merge; end # every heading had id, generated from heading text attr_writer :id_from_heading def id_from_heading?; @id_from_heading; end # use macros? defalut yes attr_writer :macros def macros?; @macros; end # when id_from_heading, non ascii char are transliterated to ascii attr_writer :id_translit def id_translit?; @id_translit; end # like template but more powerfull # do no use. attr_accessor :macro_commands @macro_commands = {} # template_handler(macroname) -> template_text # when macros enabled and {{myCoolMacro}} ocured, # result fo `template_handler('myCoolMacro') inserted attr_accessor :template_handler # Proc or nil # at_callback.call(env, key) -> computed value attr_accessor :at_callback # macro {{$var}} | {{#comment}} | {{!cmd}} | {{template}} | {{/template}} # string begins with macro MACRO_BEG_REX = /\A\{\{ ( \$[#\$\.\w]+ | [\#!\/]\w* |\w+ ) /x MACRO_BEG_INSIDE_REX = /\A(.*?) (? proc { |env| what = env.expand_arg(0) count = env.arg_count i = 1 ret = '' while count > i argi = env.expand_arg(i) # this is else: if count <= i + 1 ret = argi break end # we found it if what == argi ret = env.expand_arg(i+1) break end # next i +=2 end ret.sub /\n\s+\Z/, '' }, '!ifdef' => proc { |env| env.at(env.expand_arg(0), nil, false).nil? ? env.expand_arg(2) : env.expand_arg(1) }, '!set' => proc { |env| env[env.expand_arg(0)] = env.expand_arg(1); '' }, '!append'=> proc { |env| key = env.expand_arg(0) sep = env.expand_arg(2,'') env[key] = (env[key].nil? ? '' : env[key] + sep ) + env.expand_arg(1); '' }, '!yset' => proc { |env| env[env.expand_arg(0)] = YAML.load(env.arg(1)); '' }, '!sub' => proc { |env| pat = env.expand_arg(1) pat = Regexp.new(pat[1..-2]) if pat =~ /\A\/.*\/\Z/ env.expand_arg(0).gsub(pat, env.expand_arg(2)) }, '!macpos'=> proc { |env| "#{env.at('lineno')}.#{env.at('offset')}-#{env.at('elineno')}.#{env.at('eoffset')}" }, # in macro {{arg {{$i}}} -> i=4 => ctyry '!arg' => proc { |env| #print "arg: #{env.arg(0)}, #{env} }" "#{env.at(env.expand(env.arg(0)))}" }, # {{!forargs i|3|i:{$$i}}}} -> 0,1,2 # {{!forargs i||i:{$$i}}}} -> 0,1,2 '!sprintf' => proc { |env| fmt = env.arg(0) args = (1 .. env.arg_count ).map{ |i| env.expand_arg(i)} begin sprintf fmt, *args rescue Exception => e "(sprintf error:`#{e}`)" end }, '!digest' => proc { |env| Base64.urlsafe_encode64(Digest::SHA256.digest(env.expand_arg(0))) }, '!base64' => proc { |env| Base64.urlsafe_encode64(env.expand_arg(0)) }, '!tt' => proc { |env| "`#{env.expand_arg(0)}`" }, '!forargs' => proc { |env| i_name = env.arg(0) i_name = env.arg(0) start = env.arg(1).to_i # side efect '' => 0 start = 1 if start < 1 template = env.arg(2) argcount = (start .. env.arg_count ).map do |i| env.atput(i_name, i.to_s) env.expand(template) end.join('') }, # {{!for i|1,3|i:{$$i}}}} -> 0,1,2 # {{!for i|1,3,data|i:{$$i}}}} -> data[1,2,3] # {{!for i|data|i:{$$i}}}} -> vsechny data # {{!for i|1|i:{$$i}}}} -> vsechny data '!for' => proc { |env| i_name = env.arg(0) raise "!for takes exactly 3 argumentsif not #{env.arg_count}" if env.arg_count != 3 i_name = env.arg(0) range = env.expand_arg(1) templ = env.arg(2) arange = range.split(/,/) arange = [ nil, nil, arange[0]] if arange.size == 1 bot = arange[0] top = arange[1] var = arange[2] if var.nil? set = (bot.to_i||1 .. top.to_i) else obj = env.at(var, nil, false) if obj.is_a?(Hash) set = obj.keys.sort elsif obj.is_a?(Array) #print "ble:#{bot} #{bot}, #{obj.size}\n" set = (bot||0 .. obj.size-1) elsif obj.nil? set = [] else raise "Error: wrong arg #{obj} class='#{obj.class}'" end end #set.select! { |x| x >= bot } if bot #set.select! { |x| x <= top } if top #print "for set#{set} tmpl#{templ||"nic"}\n" set.map do |i| #print "for#{i} #{templ}\n" env[i_name] = i.to_s env.atput(i_name, i.to_s) env.expand(templ) end.join('') }, } end # Escape any characters with special meaning in HTML using HTML # entities. (&<>" not ') def escape_html(string) string.nil? ? '' : Parser.escapeHTML(string) end def self.escapeHTML(string) string.gsub(/&/, '&').gsub(/\"/, '"').gsub(/>/, '>').gsub(/ "LocalLink" # make_local_link("/Foo/Bar") #=> "%2FFoo%2FBar" # # Must ensure that the result is properly URL-escaped. The caller # will handle HTML escaping as necessary. HTML links will not be # inserted if the function returns nil. # # Example custom behaviour: # # make_local_link("LocalLink") #=> "/LocalLink" # make_local_link("Wikipedia:Bread") #=> "http://en.wikipedia.org/wiki/Bread" def make_local_link(link) #:doc: # FIXME: xss when no_escape link, anch = link.split(/#/, 2) if no_escape? prefix = @base if link =~ /^\/(.*)/ link = $1 prefix = @root end return "#{prefix}#{link}" if ! anch return "##{anch}" if link == '' return "#{prefix}#{link}##{anch}" end return "#{@base}#{escape_url(link)}" if ! anch return "##{escape_url(anch)}" if link == '' "#{@base}#{escape_url(link)}##{escape_url(anch)}" end # Create image markup. This # method can be overridden to generate custom # markup, for example to add html additional attributes or # to put divs around the imgs. def make_image(uri, attrs='') attrs = make_image_attrs(uri, attrs) link = attrs.delete('link') @tree.tag_beg(:a, {href: make_explicit_link(link)}) if link @tree.tag(:img, attrs) @tree.tag_end(:a) if link end def make_image_attrs(uri, attrs) a = {src: make_explicit_link(uri)} style = [] attrs ||= '' attrs.strip.split(/\s*,\s*/).each do |opt| case opt when /^\d+[^\d]*$/ a['width'] = opt when /^(right|left|center)/i a['align'] = opt when /^(top|bottom|middle)$/i a['valign'] = opt when /^link=(.*)$/i a['link'] = $1 when /^nolink$/i # pass when /^(align|valign|border|width|height|alt|title|longdesc|class|id|usemap)=(.*)$/i a[$1]= $2 when /^(margin|margin-(left|right|top|bottom))=(\d+)$/ style.push($1 + ':' + $3) end end a[:style] = style.join(';') if ! style.empty? return {} if a.empty? return a; end def make_headline(level, title, aname, title_offset) hN = "h#{level}".to_sym @tree.tag_beg(hN, { id: aname } ) parse_inline(title, title_offset) if edit_heading? edit_heading_link(@headings.size - 1) end @tree.tag_end(hN) end def edit_heading_link(section) @tree.tag(:a, { class: @edit_heading_class, id:"h#{section}", href: "#{@base_url}?edit=#{section}"}, "edit") end def make_explicit_link(link) begin uri = URI.parse(link) return uri.to_s if uri.scheme && @allowed_schemes.include?(uri.scheme) rescue URI::InvalidURIError end make_local_link(link) end def make_toc @headings.map do |h| if h[:level] < 1 '' else ind = " " * (h[:level] - 1) "#{ind}* [[##{h[:aname]}|#{h[:title]}]]\n" end end.join end def parse_inline(str, offset) raise "offset is nil" if offset.nil? until str.empty? case # raw url http://example.com/ when str =~ /\A(!)?((https?|ftps?):\/\/\S+?)(?=([\]\,.?!:;"'\)]+)?(\s|$))/ notlink, link = $1, $2 make_link(link, nil, link, 0, !!notlink) # [[Image(pic.jpg|tag)]] when str =~ /\A\[\[Image\(([^,]*?)(,(.*?))?\)\]\]/ # image make_image($1, $3) # [[link]] # [ link2 | text5 ] when str =~ /\A (\[ \s* ([^\[|]*?) \s*) ((\|\s*)(.*?))? \s* \] /mx link, content, content_offset, whole = $2, $5, $1.size + ($4||'').size, $& make_link(link, content, "[#{whole}]",offset + content_offset) # [[ link2 | text5 ]] when str =~ /\A (\[\[ \s* ([^|]*?) \s*) ((\|\s*)(.*?))? \s* \]\] /mx link, content, content_offset, whole= $2, $5, $1.size + ($4||'').size, $& #print "link: #{content_offset} of:#{offset}, '#{$1}', '#{$4||''}'\n" make_link(link, content, whole, offset + content_offset) when allow_html? && str =~ /\A<(\/)?(\w+)(?:([^>]*?))?(\/\s*)?>/ # single inline tag eot, tag, args, closed = $1, $2, $3, $4 do_raw_tag(eot, tag, args, closed, $'.size) when str =~ /\A\{\{\{(.*?\}*)\}\}\}/ # inline {{{ }}} pre (tt) @tree.tag(:tt, $1) when macros? && str =~ MACRO_BEG_REX # macro {{ mac, str, lines, offset = parse_macro($1, $', offset, $&.size) parse_inline(mac.gsub(/\n/, ' '),0); #print "MACRO.inline(#{$1}), next:#{str}" #return str, offset next when str =~ /\A`(.*?)`/ # inline pre (tt) @tree.tag(:tt, $1) when math? && str =~ /\A\$(.+?)\$/ # inline math (tt) @tree.tag(:span, {:class => 'math'}, $1) @was_math = true when str =~ /\A(\&\w*;)/ # html entity #print "add html ent: #{$1}\n" @tree.add_raw($1) when str =~ /\A([:alpha:]|[:digit:])+/ @tree.add($&) # word when str =~ /\A\s+/ @tree.add_spc when str =~ /\A'''''/ toggle_tag 'strongem', $& # bolditallic when str =~ /\A\*\*/ || str =~ /\A'''/ toggle_tag 'strong', $& # bold when str =~ /\A''/ || str =~ /\A\/\// toggle_tag 'em', $& # italic when str =~ /\A\\\\/ || str =~ /\A\[\[br\]\]/i @tree.tag(:br) # newline when str =~ /\A__/ toggle_tag 'u', $& # underline when str =~ /\A~~/ toggle_tag 'del', $& # delete when str =~ /\A~/ @tree.add_raw(' ') # tilde # when /\A\+\+/ # toggle_tag 'ins', $& # insert when str =~ /\A\^/ toggle_tag 'sup', $& # ^{} when str =~ /\A,,/ toggle_tag 'sub', $& # _{} when str =~ /\A!\./ @tree.add('') # !. \relax when str =~ /\A!(\{\{|[\S])/ @tree.add($1) # !neco !{{ # when str =~ /\A!(\{\{)/ # @tree.add($1) # !neco !{ when str =~ /\A./ @tree.add($&) # ordinal char end str = $' offset += $&.size end return offset end ################################################################# # macro {{ }} # convetntion {{!cmd}} {{template}} {{$var}} {{# comment}} {{!}} (pipe) # r: expanded macro , rest of str, count lines taken from str # sideefect: parse result of macro def parse_macro(macro_name, str, offset, macro_name_size) raise "offset is nil" if offset.nil? raise "offset is nil" if macro_name_size.nil? @env.atput('offset', offset) @env.atput('lineno', @line_no) begin mac_out, rest, lines, rest_offset = @env.parse_macro_all(macro_name, str, macro_name_size) raise "lines is nil" if lines.nil? #print "mac: '#{mac_out}' rest: '#{rest}'\n" #print "mac: ro #{rest_offset}, of#{offset}, lines: #{lines} ms: #{macro_name_size} strlen#{str.size}, str'#{str}' rest:'#{rest}'\n" rest_offset += offset + macro_name_size if lines == 0 #print "ro#{rest_offset}\n" return mac_out, rest, lines, rest_offset rescue TooLongException => e return '', "TOO_LONG_EXPANSION_OF_MACRO(#{macro_name})QUIT", 0, 0 rescue Exception => e #@tree.tag(:span, {:title => "#{e}\", :class=>'parse-error'}, "!!!") @tree.tag(:span, {:title => "#{e}\n#{e.backtrace}", :class=>'parse-error'}, "!!!") print "tace#{e.backtrace.to_s}\n" return '', '', 0, 0 end end ################################################################# def make_link(link, content, whole, offset, not_make_link = false ) # was '!' before url? return @tree.add(whole) if not_make_link # specail "link" [[BR]]: return @tree.tag(:br) if link =~ /^br$/i uri = make_explicit_link(link) return @tree.add(whole) if not uri return @tree.add(whole) if no_link? && uri !~ /^(ftp|https?):/ @tree.tag_beg(:a, {href:uri}) if content parse_inline(content, offset) else @tree.add(link) end @tree.tag_end(:a) end ################################################################# def parse_table_row(str) offset = 0; start_tag('tr') if !@stack.include?('tr') str.sub!(/\r/, '') colspan = 1 print_tr = true last_tail = '' last_txt = '' str.scan(/(=?)(\s*)(.*?)\1?($ | \|\|\\\s*$ | \|\| )/x) do tdth = $1.empty? ? 'td' : 'th' tdth_size = $1.size le, txt, tail, cell_size = $2.size, $3, $4, $&.size # do not end row, continue on next line print_tr = false if tail =~ /^\|\|\\/ if txt.empty? && le == 0 colspan += 1 next end style = nil if txt =~ /\S(\s*)$/ ri = $1.size ri += 100 if tail.empty? # do not right when last || omnited style = 'text-align:right' if ri == 0 && le >= 1 style = 'text-align:center' if le >= 2 && ri >= 2 #print "le#{le} ri#{ri} st:#{style}\n" end colspan = colspan > 1 ? colspan : nil; start_tag(tdth, { style:style, colspan: colspan}); colspan = 1 parse_inline(txt.strip, offset + tdth_size + le + 2) if txt end_tag while @stack.last != 'tr' offset += cell_size end if print_tr end_tag end return offset end def make_nowikiblock(input) input.gsub(/^ (?=\}\}\})/, '') end def parse_li_line(spc_size, bullet) while !@stacki.empty? && @stacki.last > spc_size end_tag end if @stack.include?('li') while @stack.last != 'li' end_tag end # end list if type differ # @stack.last is now ul or li if @stacki.last == spc_size end_tag # li ulol_last = @stack.last ulol_now = bullet =~ /[*-]/ ? 'ul' : 'ol' if ulol_last != ulol_now end_tag # ol | ul end end else end_paragraph end if @stacki.empty? || @stacki.last < spc_size bullet.gsub!(/\.$/,'') ulol = bullet =~ /[-*]/ ? 'ul' : 'ol'; type = nil type = 'i' if bullet =~ /i/i; type = 'a' if bullet =~ /a/i; start = nil start = bullet if bullet =~ /^\d+$/ && bullet != '1' start_tag(ulol, {type: type, start: start}, spc_size) end start_tag('li') end def blockquote_level_to(level) cur_level = @stack.count('blockquote') if cur_level == level @tree.add(' ') return end while cur_level < level cur_level += 1 start_tag('blockquote') end while cur_level > level cur_level -= 1 if @stack.last == 'blockquote' end_tag end end def do_math(text) end_paragraph #@tree.add("$$#{text}$$\n") @tree.tag(:div, {class:'math'}, text) @was_math = true end def do_merge(merge_type, who) merge_class = case merge_type[0] when '<' ; 'merge-mine' when '=' ; 'merge-split' when '|' ; 'merge-orig' when '>' ; 'merge-your' end end_paragraph @tree.tag(:div, { class: "merge #{merge_class}" }, who) end def do_wikimedia_table(text) end_paragraph if div_around_table? start_tag('div', class: 'table-div', 'data-from-line' => @line_no, 'data-to-line' => @line_no + text.count("\n") + 2) end start_tag(:table) start_tag(:tr) offset = 0 text.split("\n").each do |line| offset += line.length + 1 if line == '|-' end_to_tag :tr start_tag(:tr) elsif line =~ /^([!\|])(.*)/ end_to_tag :td end_to_tag :th fst, rest = $1, $2 start_tag($1 == '|' ? :td : :th) parse_inline(rest.strip, offset) else parse_inline(' ' +line.strip, offset) end end end_to_tag :table end def do_pre(text) end_paragraph nowikiblock = make_nowikiblock(text) @tree.tag(:pre, nowikiblock) end def do_raw_tag(eot, tag, attrs, closed, tail_size) if !eot end_paragraph if tag == 'p' || tag == 'div' #print "open tag #{tag},'#{attrs}'\n" attrs_h = _parse_attrs_to_hash(attrs) @tree.tag_beg(tag, attrs_h) @tree.tag_end(tag) if closed else #print "close tag #{tag}\n" @tree.tag_end(tag) if tag == 'p' || tag == 'div' end_paragraph start_paragraph if tail_size > 0 end end end def _parse_attrs_to_hash(str) ret = {} @env.atput('lineno', @line_no); while str =~ /\A\s*([-\w]+)\s*=\s*'([^>']*)'/ || str =~ /\A\s*([-\w]+)\s*=\s*"([^>"]*)"/ || str =~ /\A\s*([-\w]+)\s*=\s*(\S*)/ ret[$1] = @env.expand($2) str = $' end ret end def do_hr end_paragraph @tree.tag(:hr) end def do_heading(level, title, aname, title_offset) aname= aname_nice(aname, title) @headings.last[:eline] = @line_no - 1 @headings.push({ :title => title, :sline => @line_no, :aname => aname, :level => level, }) end_paragraph make_headline(level, title, aname, title_offset) end def do_table_row(text) if !@stack.include?('table') end_paragraph @table_node = start_tag('div', class: 'table-div', 'data-from-line' => @line_no) if div_around_table? start_tag('table') end @table_node.attrs['data-to-line'] = @line_no if @table_node parse_table_row(text) end def do_term(term) start_tag('dl') start_tag('dt') @tree.add(term) end_tag start_tag('dd') end def do_citation(level) start_paragraph if !@stack.include? 'p' blockquote_level_to(level) end def do_ord_line(spc_size) if @stack.include?('li') || @stack.include?('dl') # dl, li continuation parse_inline(' ', 0) elsif spc_size > 0 # quote continuation start_paragraph if !@stack.include? 'p' blockquote_level_to(1) else # real ordinary line start_paragraph end end def parse_block(str, want_end_paragraph = true) #print "BLOCK.str(#{str})\n" until str.empty? case # macro when macros? && str =~ MACRO_BEG_REX mac, str, lines, offset = parse_macro($1, $', 0, $&.size) raise 'lines is nil' if lines.nil? raise 'offset is nil' if offset.nil? #print "MACRO.lines(#{$1})lines:#{lines}, str:'#{str}'\n" add_line_no(lines) @count_lines_level +=1 parse_block(mac, false) @count_lines_level -=1 if mac.size > 0 && str =~ /^(.*)(\r?\n)?/ line, str = $1 , $' add_line_no($&.count("\n")) parse_inline(line, offset) end next # display math $$ when math? && str =~ /\A\$\$(.*?)\$\$/m do_math($1) # merge when merge? && str =~ /\A(<{7}|={7}|>{7}|\|{7}) *(\S*).*$(\r?\n)?/ do_merge($1, $2) # pre {{{ ... }}} when str =~ /\A\{\{\{\r?\n(.*?)\r?\n\}\}\}/m do_pre($1) # wikimedia table {| ... |} when str =~ /\A\{\|\r?\n(.*?)\r?\n\|\}/m do_wikimedia_table($1) # horizontal rule when str =~ /\A\s*-{4,}\s*$/ do_hr() # heading == Wiki Ruless == # heading == Wiki Ruless == #tag when str =~ /\A([[:blank:]]*(={1,6})\s*)(.*?)\s*=*\s*(#(\S*))?\s*$(\r?\n)?/ do_heading($2.size, $3, $5, $1.size) # table row || when str =~ /\A[ \t]*\|\|(.*)$(\r?\n)?/ do_table_row($1) # empty line when str =~ /\A\s*$(\r?\n)?/ end_paragraph #when str =~ /\A([:\w\s]+)::(\s+|\r?\n)/ when str =~ /\A(.+)::(\s+|\r?\n)/ do_term($1) # li * when str =~ /\A((\s*)([*-]|[aAIi\d]\.)\s+)(.*?)$(\r?\n)?/ parse_li_line($2.size, $3) parse_inline($4, $1.size) # citation > when str =~ /\A(>[>\s]*)(.*?)$(\r?\n)?/ do_citation($1.count('>')) parse_inline($2, $1.size) # ordinary line when str =~ /\A(\s*)(\S+.*?)$(\r?\n)?/ text = $2 do_ord_line($1.size) parse_inline(text.rstrip, $1.size) else # case str raise "Parse error at #{str[0,30].inspect}" end add_line_no(($`).count("\n")+($&).count("\n")) str = $' end end_paragraph if want_end_paragraph @headings.last[:eline] = @line_no - 1 end def aname_nice(aname, title) if aname.nil? && id_from_heading? aname = title.gsub /\s+/, '_' aname = _translit(aname) if id_translit? end return nil if aname.nil? aname_ori = aname count = 2 while @anames[aname] aname = aname_ori + ".#{count}" count+=1 end @anames[aname] = true aname end def _translit(text) # iconv is obsolete, but translit funcionality was not replaced # see http://stackoverflow.com/questions/20224915/iconv-will-be-deprecated-in-the-future-transliterate # return Iconv.iconv('ascii//translit', 'utf-8', text).join # http://unicode-utils.rubyforge.org/UnicodeUtils.html#method-c-compatibility_decomposition return UnicodeUtils.compatibility_decomposition(text).chars.grep(/\p{^Mn}/).join('') end end end