# encoding: UTF-8 # # Copyright 2011, 2012 Keith Rarick # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # See https://github.com/kr/okjson for updates. require 'stringio' # Some parts adapted from # http://golang.org/src/pkg/json/decode.go and # http://golang.org/src/pkg/utf8/utf8.go module Raven module OkJson Upstream = '43' extend self # Decodes a json document in string s and # returns the corresponding ruby value. # String s must be valid UTF-8. If you have # a string in some other encoding, convert # it first. # # String values in the resulting structure # will be UTF-8. def decode(s) ts = lex(s) v, ts = textparse(ts) if ts.length > 0 raise Error, 'trailing garbage' end v end # Encodes x into a json text. It may contain only # Array, Hash, String, Numeric, true, false, nil. # (Note, this list excludes Symbol.) # X itself must be an Array or a Hash. # No other value can be encoded, and an error will # be raised if x contains any other value, such as # Nan, Infinity, Symbol, and Proc, or if a Hash key # is not a String. # Strings contained in x must be valid UTF-8. def encode(x) case x when Hash then objenc(x) when Array then arrenc(x) else raise Error, 'root value must be an Array or a Hash' end end def valenc(x) case x when Hash then objenc(x) when Array then arrenc(x) when String then strenc(x) when Symbol then strenc(x.to_s) when Numeric then numenc(x) when true then "true" when false then "false" when nil then "null" else strenc((x.inspect rescue $!.to_s)) end end private # Parses a "json text" in the sense of RFC 4627. # Returns the parsed value and any trailing tokens. # Note: this is almost the same as valparse, # except that it does not accept atomic values. def textparse(ts) if ts.length <= 0 raise Error, 'empty' end typ, _, val = ts[0] case typ when '{' then objparse(ts) when '[' then arrparse(ts) else raise Error, "unexpected #{val.inspect}" end end # Parses a "value" in the sense of RFC 4627. # Returns the parsed value and any trailing tokens. def valparse(ts) if ts.length <= 0 raise Error, 'empty' end typ, _, val = ts[0] case typ when '{' then objparse(ts) when '[' then arrparse(ts) when :val,:str then [val, ts[1..-1]] else raise Error, "unexpected #{val.inspect}" end end # Parses an "object" in the sense of RFC 4627. # Returns the parsed value and any trailing tokens. def objparse(ts) ts = eat('{', ts) obj = {} unless ts[0] raise Error, "unexpected end of object" end if ts[0][0] == '}' return obj, ts[1..-1] end k, v, ts = pairparse(ts) obj[k] = v if ts[0][0] == '}' return obj, ts[1..-1] end loop do ts = eat(',', ts) k, v, ts = pairparse(ts) obj[k] = v if ts[0][0] == '}' return obj, ts[1..-1] end end end # Parses a "member" in the sense of RFC 4627. # Returns the parsed values and any trailing tokens. def pairparse(ts) (typ, _, k), ts = ts[0], ts[1..-1] if typ != :str raise Error, "unexpected #{k.inspect}" end ts = eat(':', ts) v, ts = valparse(ts) [k, v, ts] end # Parses an "array" in the sense of RFC 4627. # Returns the parsed value and any trailing tokens. def arrparse(ts) ts = eat('[', ts) arr = [] unless ts[0] raise Error, "unexpected end of array" end if ts[0][0] == ']' return arr, ts[1..-1] end v, ts = valparse(ts) arr << v if ts[0][0] == ']' return arr, ts[1..-1] end loop do ts = eat(',', ts) v, ts = valparse(ts) arr << v if ts[0][0] == ']' return arr, ts[1..-1] end end end def eat(typ, ts) if ts[0][0] != typ raise Error, "expected #{typ} (got #{ts[0].inspect})" end ts[1..-1] end # Scans s and returns a list of json tokens, # excluding white space (as defined in RFC 4627). def lex(s) ts = [] while s.length > 0 typ, lexeme, val = tok(s) if typ == nil raise Error, "invalid character at #{s[0,10].inspect}" end if typ != :space ts << [typ, lexeme, val] end s = s[lexeme.length..-1] end ts end # Scans the first token in s and # returns a 3-element list, or nil # if s does not begin with a valid token. # # The first list element is one of # '{', '}', ':', ',', '[', ']', # :val, :str, and :space. # # The second element is the lexeme. # # The third element is the value of the # token for :val and :str, otherwise # it is the lexeme. def tok(s) case s[0] when ?{ then ['{', s[0,1], s[0,1]] when ?} then ['}', s[0,1], s[0,1]] when ?: then [':', s[0,1], s[0,1]] when ?, then [',', s[0,1], s[0,1]] when ?[ then ['[', s[0,1], s[0,1]] when ?] then [']', s[0,1], s[0,1]] when ?n then nulltok(s) when ?t then truetok(s) when ?f then falsetok(s) when ?" then strtok(s) when Spc, ?\t, ?\n, ?\r then [:space, s[0,1], s[0,1]] else numtok(s) end end def nulltok(s); s[0,4] == 'null' ? [:val, 'null', nil] : [] end def truetok(s); s[0,4] == 'true' ? [:val, 'true', true] : [] end def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end def numtok(s) m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s) if m && m.begin(0) == 0 if !m[2] && !m[3] [:val, m[0], Integer(m[0])] elsif m[2] [:val, m[0], Float(m[0])] else # We don't convert scientific notation [:val, m[0], m[0]] end else [] end end def strtok(s) m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s) if ! m raise Error, "invalid string literal at #{abbrev(s)}" end [:str, m[0], unquote(m[0])] end def abbrev(s) t = s[0,10] p = t['`'] t = t[0,p] if p t = t + '...' if t.length < s.length '`' + t + '`' end # Converts a quoted json string literal q into a UTF-8-encoded string. # The rules are different than for Ruby, so we cannot use eval. # Unquote will raise an error if q contains control characters. def unquote(q) q = q[1...-1] a = q.dup # allocate a big enough string # In ruby >= 1.9, a[w] is a codepoint, not a byte. if rubydoesenc? a.force_encoding('UTF-8') end r, w = 0, 0 while r < q.length c = q[r] if c == ?\\ r += 1 if r >= q.length raise Error, "string literal ends with a \"\\\": \"#{q}\"" end case q[r] when ?",?\\,?/,?' a[w] = q[r] r += 1 w += 1 when ?b,?f,?n,?r,?t a[w] = Unesc[q[r]] r += 1 w += 1 when ?u r += 1 uchar = begin hexdec4(q[r,4]) rescue RuntimeError => e raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}" end r += 4 if surrogate? uchar if q.length >= r+6 uchar1 = hexdec4(q[r+2,4]) uchar = subst(uchar, uchar1) if uchar != Ucharerr # A valid pair; consume. r += 6 end end end if rubydoesenc? a[w] = '' << uchar w += 1 else w += ucharenc(a, w, uchar) end else raise Error, "invalid escape char #{q[r]} in \"#{q}\"" end elsif c == ?" || c < Spc raise Error, "invalid character in string literal \"#{q}\"" else # Copy anything else byte-for-byte. # Valid UTF-8 will remain valid UTF-8. # Invalid UTF-8 will remain invalid UTF-8. # In ruby >= 1.9, c is a codepoint, not a byte, # in which case this is still what we want. a[w] = c r += 1 w += 1 end end a[0,w] end # Encodes unicode character u as UTF-8 # bytes in string a at position i. # Returns the number of bytes written. def ucharenc(a, i, u) if u <= Uchar1max a[i] = (u & 0xff).chr 1 elsif u <= Uchar2max a[i+0] = (Utag2 | ((u>>6)&0xff)).chr a[i+1] = (Utagx | (u&Umaskx)).chr 2 elsif u <= Uchar3max a[i+0] = (Utag3 | ((u>>12)&0xff)).chr a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr a[i+2] = (Utagx | (u&Umaskx)).chr 3 else a[i+0] = (Utag4 | ((u>>18)&0xff)).chr a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr a[i+3] = (Utagx | (u&Umaskx)).chr 4 end end def hexdec4(s) if s.length != 4 raise Error, 'short' end (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3]) end def subst(u1, u2) if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3 return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself end return Ucharerr end def surrogate?(u) Usurr1 <= u && u < Usurr3 end def nibble(c) if ?0 <= c && c <= ?9 then c.ord - ?0.ord elsif ?a <= c && c <= ?z then c.ord - ?a.ord + 10 elsif ?A <= c && c <= ?Z then c.ord - ?A.ord + 10 else raise Error, "invalid hex code #{c}" end end def objenc(x) '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}' end def arrenc(a) '[' + a.map{|x| valenc(x)}.join(',') + ']' end def keyenc(k) case k when String then strenc(k) when Symbol then strenc(k.to_s) else raise Error, "Hash key is not a string: #{k.inspect}" end end def strenc(s) t = StringIO.new t.putc(?") r = 0 while r < s.length case s[r] when ?" then t.print('\\"') when ?\\ then t.print('\\\\') when ?\b then t.print('\\b') when ?\f then t.print('\\f') when ?\n then t.print('\\n') when ?\r then t.print('\\r') when ?\t then t.print('\\t') else c = s[r] # In ruby >= 1.9, s[r] is a codepoint, not a byte. if rubydoesenc? begin # c.ord will raise an error if c is invalid UTF-8 if c.ord < Spc.ord c = "\\u%04x" % [c.ord] end t.write(c) rescue t.write(Ustrerr) end elsif c < Spc t.write("\\u%04x" % c) elsif Spc <= c && c <= ?~ t.putc(c) else n = ucharcopy(t, s, r) # ensure valid UTF-8 output r += n - 1 # r is incremented below end end r += 1 end t.putc(?") t.string end def numenc(x) if ((x.nan? || x.infinite?) rescue false) raise Error, "Numeric cannot be represented: #{x}" end "#{x}" end # Copies the valid UTF-8 bytes of a single character # from string s at position i to I/O object t, and # returns the number of bytes copied. # If no valid UTF-8 char exists at position i, # ucharcopy writes Ustrerr and returns 1. def ucharcopy(t, s, i) n = s.length - i raise Utf8Error if n < 1 c0 = s[i].ord # 1-byte, 7-bit sequence? if c0 < Utagx t.putc(c0) return 1 end raise Utf8Error if c0 < Utag2 # unexpected continuation byte? raise Utf8Error if n < 2 # need continuation byte c1 = s[i+1].ord raise Utf8Error if c1 < Utagx || Utag2 <= c1 # 2-byte, 11-bit sequence? if c0 < Utag3 raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max t.putc(c0) t.putc(c1) return 2 end # need second continuation byte raise Utf8Error if n < 3 c2 = s[i+2].ord raise Utf8Error if c2 < Utagx || Utag2 <= c2 # 3-byte, 16-bit sequence? if c0 < Utag4 u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx) raise Utf8Error if u <= Uchar2max t.putc(c0) t.putc(c1) t.putc(c2) return 3 end # need third continuation byte raise Utf8Error if n < 4 c3 = s[i+3].ord raise Utf8Error if c3 < Utagx || Utag2 <= c3 # 4-byte, 21-bit sequence? if c0 < Utag5 u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx) raise Utf8Error if u <= Uchar3max t.putc(c0) t.putc(c1) t.putc(c2) t.putc(c3) return 4 end raise Utf8Error rescue Utf8Error t.write(Ustrerr) return 1 end def rubydoesenc? ::String.method_defined?(:force_encoding) end class Utf8Error < ::StandardError end class Error < ::StandardError end Utagx = 0b1000_0000 Utag2 = 0b1100_0000 Utag3 = 0b1110_0000 Utag4 = 0b1111_0000 Utag5 = 0b1111_1000 Umaskx = 0b0011_1111 Umask2 = 0b0001_1111 Umask3 = 0b0000_1111 Umask4 = 0b0000_0111 Uchar1max = (1<<7) - 1 Uchar2max = (1<<11) - 1 Uchar3max = (1<<16) - 1 Ucharerr = 0xFFFD # unicode "replacement char" Ustrerr = "\xef\xbf\xbd" # unicode "replacement char" Usurrself = 0x10000 Usurr1 = 0xd800 Usurr2 = 0xdc00 Usurr3 = 0xe000 Spc = ' '[0] Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t} end end