# ver: sw=2 require 'nokogiri' require 'pp' require_relative 'vendor/better_pp_hash' # Define some magic constants that aren't available but may be returned from # Regexp#options so we can make sure that roundtrip via PP works. # # 1.9.2 actually defines Regexp::FIXEDENCODING, but there's no release yet, so # we simply assume the values from the Ruby C source (which means this might not # work on any other implementation). # Hopefully nobody tries to convert textmate bundles unless they are on MRI 1.9.1 ;) class Regexp NO_ENCODING = //n.options alias original_inspect inspect # Take into account //n option def inspect if options & NO_ENCODING == 0 original_inspect else original_inspect << 'n' end end end module VER module Plist class XML def initialize(xml) @doc = Nokogiri(xml) @plist = {} @parsed = nil @exceptions = [] end def parse @parsed || parse! end def parse! dict = @doc.at('/plist/dict') if dict @parsed = handle_dict(dict) else raise ArgumentError, 'This is no XML plist' end fail "#{@exceptions.size} errors encountered" if @exceptions.any? @parsed end def handle_array(array) out = [] array.children.each do |child| child_name = child.name next if child_name == 'text' out.push( case child_name when 'key' raise 'No key allows in array' when 'array' handle_array(child) when 'dict' handle_dict(child) when 'false' false when 'integer', 'real' child.inner_text.to_i when 'string' child.inner_text when 'true' true else raise "unhandled %p: %p" % [child_name, child] end ) end out end def handle_dict(dict) out = {} key = nil dict.children.each do |child| child_name = child.name case child_name when 'key' case key = child.inner_text when /^\d+$/ key = key.to_i else key = key.to_sym end when 'text' # ignore else raise 'No key given' unless key out[key] = case child_name when 'array' handle_array(child) when 'dict' handle_dict(child) when 'integer', 'real' child.inner_text.to_i when 'true' true when 'false' false when 'string' value = child.inner_text case key when :begin, :match, :foldingStartMarker, :foldingStopMarker sanitize_regexp(value) else value end else raise "unhandled %p: %p" % [child_name, child] end key = nil end end out end def to_yaml parse.to_yaml end def to_json parse.to_json end def to_hash parse end private SANITIZE_REGEXP = {} r = lambda{|string, replacement| pattern = if string.is_a? Regexp string else Regexp.new(Regexp.escape(string)) end replacement.force_encoding Encoding::BINARY SANITIZE_REGEXP[pattern] = replacement } # found in newLisp.tmbundle r['(?<!(?:\{|\"|]))(?:[\s\n]*)?(;+.*?)$', '(?<!(?:\{|"|\]))(?:\s*)(;+.*)$'] # found in Movable Type.tmbundle r['(<)(\$[mM][tT]:?(?:\w+)?:?\w+)(.*?)(\$)?(>)', '(<)(\$[mM][tT]:?(?:\w*):?\w+)(.*?)(\$)?(>)'] r['(</?)([mM][tT]:?(?:\w+)?:?\w+)(.*?)>', '(</?)([mM][tT]:?(?:\w*):?\w+)(.*?)>'] r['\b([a-zA-Z-_:]+)', '\b([a-zA-Z_:-]+)'] # found in OCaml.tmbundle r['(?=(\[<)(?![^\[]+?[^>]]))(.*?)(>])', '(?=(\[<)(?![^\[]+?[^>\]]))(.*?)(>\])'] r['(\')(.*?)(;)(?=\s*\')|(?=\s*>])', '(\')(.*?)(;)(?=\s*\')|(?=\s*>\])'] r['(\[)(?!\||<|>)(.*?)(?<!\||>)(])', '(\[)(?!\||<|>)(.*?)(?<!\||>)(\])'] r['(\[)(\^?)(.*?)(])(?!\\\')', '(\[)(\^?)(.*?)(\])(?!\\\')'] r['(\[<)(?=.*?>])(.*?)(?=>])', '(\[<)(?=.*?>\])(.*?)(?=>\])'] r['(\[\|)(.*?)(\|])', '(\[\|)(.*?)(\|\])'] r['\[<|>]', '\[<|>\]'] # found in Txt2tags.tmbundle r['|[^]]+', '|[^\]]+'] # found in Perl Template Toolkit.tmbundle r['\b([a-zA-Z-_:]+)\s*(=)', '\b([a-zA-Z_:-]+)\s*(=)'] # found in Property List.tmbundle r['<!\[CDATA\[(.*?)]]>', '<!\[CDATA\[(.*?)\]\]>'] r['(<!\[CDATA\[)(.*?)(]]>)', '(<!\[CDATA\[)(.*?)(\]\]>)'] # found in SWeave.tmbundle and a few others # warning: nested repeat operator ? and * was replaced with '*' r[/\.\?\*/, '.*'] # Found in Textile.tmbundle r[/\\\[\[\^\]\]/, '\[[^\]]'] # found in Twiki.tmbundle r['(\[)([^]]*)(\]) *(\[)(.*?)(\])', '(\[)([^\]]*)(\]) *(\[)(.*?)(\])'] # found in XML.tmbundle r['<!\[CDATA\[(.*?)]]>', '<!\[CDATA\[(.*?)\]\]>'] # '\b\u\w+\u\w+' => '\b\\u\w+\\u\w+' r[/\\(u)/i, '\\\\\1'] # found in MEL.tmbundle/Syntaxes/MEL.plist r['(\$)[a-zA-Z_\x{7f}-\x{ff}][a-zA-Z0-9_\x{7f}-\x{ff}]*?\b', '(\$)\w\w*?\b'] # "[\x{7f}-\x{ff}]" => "[\x7f-\xff]" r[/\\x\{(\h+)\}/, '\\\\x\1'] # remove lots of tabs at line start r[/^\t+/, "\t"] # Escape #@ #$ #{ because they have special meaning in ruby literal regexps r[/([^\\]|$)#([$@{])/, '\1\#\2'] # '(?=#)' => '(?=\#)' r['(?=#)', '(?=\#)'] # found in Nemerle r['(\{|(|<\[)', '(\{|\(|<\[)'] r['(\}|)|]>)', '(\}|\)|\]>)'] # found in HTML (Active4D) r['\b([a-zA-Z-:]+)','\b([a-zA-Z:-]+)'] r['\[CDATA\[(.*?)]](?=>)', '\[CDATA\[(.*?)\]\](?=>)'] # found in Bulletin Board.tmbundle r['[\[]]+', '[\[\]]+'] # found in C.tmbundle r['\bdelete\b(\s*\[\])?|\bnew\b(?!])', '\bdelete\b(\s*\[\])?|\bnew\b(?!\])'] # found in reStructuredText.tmbundle r['(\.\.)\s[A-z][A-z0-9-_]+(::)\s*$', '(\.\.)\s[A-z][A-z0-9_-]+(::)\s*$'] r['^(\.\.)\s+((\[)(((#?)[^]]*?)|\*)(\]))\s+(.*)', '^(\.\.)\s+((\[)(((#?)[^\]]*?)|\*)(\]))\s+(.*)'] # Fix invalid regular expressions so we can write them out as Ruby code. # # NOTE: # Yeah, it's weird to fix regular expressions with regular expressions. # But it seems like the people writing syntax files slept all through # regexp class and textmate must be working _very_ hard to allow all # this without giving any warnings (maybe error messages are just not # hip in osx). # Also, there seems to be a lot of copy&pasting between syntax files, as # soon as someone comes up with a horrible way to match things, # everybody else is eager to use it in their files as well. # </rant> def sanitize_regexp(value) original = value.dup SANITIZE_REGEXP.each do |pattern, replacement| value.gsub!(pattern, replacement) end value2 = '' group_index = 0 value.scan(/((?:[^\\(]+|\\[^\d])+)|(\\\d+)|(\(\??)/m) do |content, backref, capture| if capture == '(' value2 << "(?<_#{group_index += 1}>" elsif backref value2 << "\\k<_#{backref[/\d+/]}>" else value2 << (content || capture) end end Regexp.new(value2) rescue RegexpError => ex if ex.message =~ /^invalid multibyte escape:/ begin /#{value2.force_encoding(Encoding::BINARY)}/n rescue RegexpError => ex error(ex, original, value2) end else error(ex, original, value2) end end def error(exception, original, modified) puts ' [ exception ] '.center(80, '-') puts exception, *exception.backtrace puts ' [ original regexp ] '.center(80, '-') p original puts original puts ' [ modified regexp ] '.center(80, '-') p modified puts modified puts '-' * 80 @exceptions << exception end end module_function def parse_xml(filename) XML.new(File.read(filename)).parse end end end