# The pg_array extension adds support for Sequel to handle # PostgreSQL's string and numeric array types. It supports both # single and multi-dimensional arrays. For integer and # float arrays, it uses a JSON-based parser which is usually written in C # and should be fairly fast. For string and decimal arrays, it uses # a hand coded parser written in ruby that is unoptimized and probably # slow. # # This extension integrates with Sequel's native postgres adapter, so # that when array fields are retrieved, they are parsed and returned # as instances of Sequel::Postgres::PGArray subclasses. PGArray is # a DelegateClass of Array, so it mostly acts like an array, but not # completely (is_a?(Array) is false). If you want the actual array, # you can call PGArray#to_a. This is done so that Sequel does not # treat a PGArray like an Array by default, which would cause issues. # # In addition to the parsers, this extension comes with literalizers # for PGArray using the standard Sequel literalization callbacks, so # they work with on all adapters. # # To turn an existing Array into a PGArray: # # array.pg_array # # You can also provide a type, though it many cases it isn't necessary: # # array.pg_array(:varchar) # or :int4, :"double precision", etc. # # So if you want to insert an array into an int4[] database column: # # DB[:table].insert(:column=>[1, 2, 3].pg_array) # # If you would like to use PostgreSQL arrays in your model objects, you # probably want to modify the schema parsing/typecasting so that it # recognizes and correctly handles the arrays, which you can do by: # # DB.extend Sequel::Postgres::PGArray::DatabaseMethods # # If you are not using the native postgres adapter, you probably # also want to use the typecast_on_load plugin in the model, and # set it to typecast the array column(s) on load. # # If you want an easy way to call PostgreSQL array functions and # operators, look into the pg_array_ops extension. # # This extension requires both the json and delegate libraries. # # == Additional License # # PGArray::Parser code was translated from Javascript code in the # node-postgres project and has the following additional license: # # Copyright (c) 2010 Brian Carlson (brian.m.carlson@gmail.com) # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject # to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'delegate' require 'json' module Sequel module Postgres # Base class for the PostgreSQL array types. Subclasses generally # just deal with parsing, so instances manually created from arrays # can use this class correctly. class PGArray < DelegateClass(Array) ARRAY = "ARRAY".freeze DOUBLE_COLON = '::'.freeze EMPTY_BRACKET = '[]'.freeze OPEN_BRACKET = '['.freeze CLOSE_BRACKET = ']'.freeze COMMA = ','.freeze BACKSLASH = '\\'.freeze EMPTY_STRING = ''.freeze OPEN_BRACE = '{'.freeze CLOSE_BRACE = '}'.freeze NULL = 'NULL'.freeze QUOTE = '"'.freeze module DatabaseMethods ESCAPE_RE = /("|\\)/.freeze ESCAPE_REPLACEMENT = '\\\\\1'.freeze # Reset the conversion procs when extending the Database object, so # it will pick up the array convertors. This is only done for the native # postgres adapter. def self.extended(db) db.reset_conversion_procs if db.respond_to?(:reset_conversion_procs) end # Handle arrays in bound variables def bound_variable_arg(arg, conn) case arg when PGArray bound_variable_array(arg.to_a) when Array bound_variable_array(arg) else super end end # Make the column type detection deal with string and numeric array types. def schema_column_type(db_type) case db_type when /\A(character( varying)?|text).*\[\]\z/io :string_array when /\A(integer|bigint|smallint)\[\]\z/io :integer_array when /\A(real|double precision)\[\]\z/io :float_array when /\Anumeric.*\[\]\z/io :decimal_array else super end end private # Format arrays used in bound variables. def bound_variable_array(a) case a when Array "{#{a.map{|i| bound_variable_array(i)}.join(COMMA)}}" when String "\"#{a.gsub(ESCAPE_RE, ESCAPE_REPLACEMENT)}\"" else literal(a) end end # Given a value to typecast and the type of PGArray subclass: # * If given a PGArray, just return the value (even if different subclass) # * If given an Array, create a new instance of the subclass # * If given a String, call the parser for the subclass with it. def typecast_value_pg_array(value, klass) case value when PGArray value when Array klass.new(value) when String klass.parse(value) else raise Sequel::InvalidValue, "invalid value for #{klass}: #{value.inspect}" end end # Create typecast methods for the supported array types that # delegate to typecast_value_pg_array with the related class. %w'string integer float decimal'.each do |t| class_eval("def typecast_value_#{t}_array(v) typecast_value_pg_array(v, PG#{t.capitalize}Array) end", __FILE__, __LINE__) end end # PostgreSQL array parser that handles both text and numeric # input. Because PostgreSQL arrays can contain objects that # can be literalized in any number of ways, it is not possible # to make a fully generic parser. # # This parser is very simple and unoptimized, but should still # be O(n) where n is the length of the input string. class Parser attr_reader :pos # Set the source for the input, and any converter callable # to call with objects to be created. For nested parsers # the source may contain text after the end current parse, # which will be ignored. def initialize(source, converter=nil) @source = source @source_length = source.length @converter = converter @pos = -1 @entries = [] @recorded = "" @dimension = 0 end # Return 2 objects, whether the next character in the input # was escaped with a backslash, and what the next character is. def next_char @pos += 1 if (c = @source[@pos..@pos]) == BACKSLASH @pos += 1 [true, @source[@pos..@pos]] else [false, c] end end # Add a new character to the buffer of recorded characters. def record(c) @recorded << c end # Take the buffer of recorded characters and add it to the array # of entries, and use a new buffer for recorded characters. def new_entry(include_empty=false) if !@recorded.empty? || include_empty entry = @recorded if entry == NULL && !include_empty entry = nil elsif @converter entry = @converter.call(entry) end @entries.push(entry) @recorded = "" end end # Parse the input character by character, returning an array # of parsed (and potentially converted) objects. def parse(nested=false) # quote sets whether we are inside of a quoted string. quote = false until @pos >= @source_length escaped, char = next_char if char == OPEN_BRACE && !quote @dimension += 1 if (@dimension > 1) # Multi-dimensional array encounter, use a subparser # to parse the next level down. subparser = self.class.new(@source[@pos..-1], @converter) @entries.push(subparser.parse(true)) @pos += subparser.pos - 1 end elsif char == CLOSE_BRACE && !quote @dimension -= 1 if (@dimension == 0) new_entry # Exit early if inside a subparser, since the # text after parsing the current level should be # ignored as it is handled by the parent parser. return @entries if nested end elsif char == QUOTE && !escaped # If already inside the quoted string, this is the # ending quote, so add the entry. Otherwise, this # is the opening quote, so set the quote flag. new_entry(true) if quote quote = !quote elsif char == COMMA && !quote # If not inside a string and a comma occurs, it indicates # the end of the entry, so add the entry. new_entry else # Add the character to the recorded character buffer. record(char) end end raise Sequel::Error, "array dimensions not balanced" unless @dimension == 0 @entries end end # Parse the string using the generalized parser, setting the type # if given. def self.parse(string, type=nil) new(Parser.new(string, method(:convert_item)).parse, type) end # Return the item as-is by default, making conversion a no-op. def self.convert_item(s) s end private_class_method :convert_item # The type of this array. May be nil if no type was given. If a type # is provided, the array is automatically casted to this type when # literalizing. This type is the underlying type, not the array type # itself, so for an int4[] database type, it should be :int4 or 'int4' attr_accessor :array_type # Set the array to delegate to, and a database type. def initialize(array, type=nil) super(array) self.array_type = type end # The delegated object is always an array. alias to_a __getobj__ # Append the array SQL to the given sql string. # If the receiver has a type, add a cast to the # database array type. def sql_literal_append(ds, sql) sql << ARRAY _literal_append(sql, ds, to_a) if at = array_type sql << DOUBLE_COLON << at.to_s << EMPTY_BRACKET end end private # Recursive method that handles multi-dimensional # arrays, surrounding each with [] and interspersing # entries with ,. def _literal_append(sql, ds, array) sql << OPEN_BRACKET comma = false commas = COMMA array.each do |i| sql << commas if comma if i.is_a?(Array) _literal_append(sql, ds, i) else ds.literal_append(sql, i) end comma = true end sql << CLOSE_BRACKET end end # PGArray subclass handling integer and float types, using a fast JSON # parser. Does not handle numeric/decimal types, since JSON does deal # with arbitrary precision (see PGDecimalArray for that). class PGNumericArray < PGArray # Character conversion map mapping input strings to JSON replacements SUBST = {'{'.freeze=>'['.freeze, '}'.freeze=>']'.freeze, 'NULL'.freeze=>'null'.freeze} # Regular expression matching input strings to convert SUBST_RE = %r[\{|\}|NULL].freeze # Parse the input string by using a gsub to convert non-JSON characters to # JSON, running it through a regular JSON parser, and the doing a recursive # map over the output to make sure the entries are in the correct type (mostly, # to make sure real/double precision database types are returned as float and # not integer). def self.parse(string, type=nil) new(recursive_map(JSON.parse(string.gsub(SUBST_RE){|m| SUBST[m]})), type) end # Convert each item in the array to the correct type, handling multi-dimensional # arrays. def self.recursive_map(array) array.map do |i| if i.is_a?(Array) recursive_map(i) elsif i convert_item(i) end end end private_class_method :recursive_map end # PGArray subclass for decimal/numeric types. Uses the general # parser as the JSON parser cannot handle arbitrary precision numbers. class PGDecimalArray < PGArray # Convert the item to a BigDecimal. def self.convert_item(s) BigDecimal.new(s.to_s) end private_class_method :convert_item ARRAY_TYPE = 'decimal'.freeze # Use the decimal type by default. def array_type super || ARRAY_TYPE end end # PGArray subclass for handling real/double precision arrays. class PGFloatArray < PGNumericArray # Convert the item to a float. def self.convert_item(s) s.to_f end private_class_method :convert_item ARRAY_TYPE = 'double precision'.freeze # Use the double precision type by default. def array_type super || ARRAY_TYPE end end # PGArray subclass for handling int2/int4/int8 arrays. class PGIntegerArray < PGNumericArray ARRAY_TYPE = 'int4'.freeze # Use the int4 type by default. def array_type super || ARRAY_TYPE end end # PGArray subclass for handling char/varchar/text arrays. class PGStringArray < PGArray CHAR = 'char'.freeze VARCHAR = 'varchar'.freeze TEXT = 'text'.freeze # By default, use a text array. If char is given without # a size, use varchar instead, as otherwise Postgres assumes # length of 1, which is likely to cause data loss. def array_type case (c = super) when nil TEXT when CHAR, :char VARCHAR else c end end end PG_TYPES = {} unless defined?(PG_TYPES) # Automatically convert the built-in numeric and text array # types. to PGArray instances on retrieval if the native # postgres adapter is used. [ [1005, PGIntegerArray, 'int2'.freeze], [1007, PGIntegerArray, 'int4'.freeze], [1016, PGIntegerArray, 'int8'.freeze], [1021, PGFloatArray, 'real'.freeze], [1022, PGFloatArray, 'double precision'.freeze], [1231, PGDecimalArray, 'numeric'.freeze], [1009, PGStringArray, 'text'.freeze], [1014, PGStringArray, 'char'.freeze], [1015, PGStringArray, 'varchar'.freeze] ].each do |ftype, klass, type| meth = klass.method(:parse) PG_TYPES[ftype] = lambda{|s| meth.call(s, type)} end end end class Array # Return a PGArray proxy to the receiver, using a # specific database type if given. This is mostly useful # as a short cut for creating PGArray objects that didn't # come from the database. def pg_array(type=nil) Sequel::Postgres::PGArray.new(self, type) end end