module Nodepile # Class makes an array of values behave like a hash. Intended to be used # for rendering records from a tabular data source. class KeyedArrayAccessor include Enumerable attr_accessor :extensible # Note that this method will always freeze and retain a reference to the # keys_array that is passed in. Note that this method may use a reference # to the values_array passed in (allowing later mutation). See the copy # parameter for override. # @param keys_array [Array] The keys in order (think column header # names). Note that these need not be unique (although # this non-unique keys will have effect on many methods). # @param values_array [Array,nil] Array of values. Should be the same size # as the keys array and provides the value of each # corresponding key. Note that the newly created object # maintains a reference to the array passed in, so if # you want to protect from side effects, you should use # a copy of your values array. If passed nil, creates # an array composed entirely of nil values. # @param extensible [Boolean] indicates whether adding keys is permitted # @param source [String,nil,Object] If provided on creation, will be returned by # the #source method. Typically indicates the file # or other source of the record. # @param ref_num [Integer,nil] If provided on creation, will be returned by the # #ref_num method. Typically indicates the relative position of the # record within a given source. # @param metadata [#each] Read-only data that will be retained with this # record. It will be extracted from the object passed in # using #each (which works great if the object is a hash or # array of two element arrays). NOTE: A metadata key can # hide the value of standard keys unless metadata_key_prefix # is set to nil. Note that if a key prefix has been # provided but does not appear in the keys resulting # from the metadata object, then the prefix will be prepended # to the key. Except where explicitly noted below, methods # do not alter, access, or consider metadata values. # @param metadata_key_prefix [String,nil] If nil, metadata values cannot be # retrieved via the square bracket operators #[]. If non-nil # Then keys passed to the square bracket operator will be # first tested to see if they have the metadata_key_prefix. # See the #[] operator for details about how the prefix is # treated. def initialize(keys_array, values_array, extensible: true,source: nil, ref_num: nil, metadata: nil, metadata_key_prefix: '' ) raise "keys must all be of type String or nil" unless keys_array.all?{|k| k.nil? || k.is_a?(String)} @keys = keys_array.freeze @vals = values_array || Array.new(@keys.length){nil} @extensible = extensible @source = source @ref_num = ref_num reset_metadata(metadata,metadata_key_prefix: metadata_key_prefix) if metadata end # Copy self, including metadata, source, and ref_num def dup self.class.new(@keys,@vals.dup,extensible: @extensible, metadata: @meta, source: @source, ref_num: @ref_num,metadata_key_prefix: @meta_key_prefix ) end # Dump any existing metadata and replace it with the provided metadata # @param pair_enumerable [#each] Enumerable that should return key,value pairs # @return [void] def reset_metadata(pair_enumerable, metadata_key_prefix: :leave_prefix_unchanged) @meta_key_prefix = metadata_key_prefix unless metadata_key_prefix == :leave_prefix_unchanged pfx = @metadata_key_prefix || '' if pair_enumerable.is_a?(Hash) && pair_enumerable.each_key.all?{|k| k.start_with?(pfx)} @meta = pair_enumerable.dup else @meta = Hash.new pair_enumerable&.each{|(k,v)| key = (@meta_key_prefix + k) unless @meta_key_prefix.nil? || k.start_with?(@meta_key_prefix) @meta[key] = v } end nil end # @param key [String] If the value does not start with the metadata_prefix, it will be # appended # @param value [Object,nil] def update_metadata(key,value) @meta = Hash.new if @meta.nil? k = key.start_with?(@meta_key_prefix) ? k : (@meta_key_prefix + key) @meta&.[]=(k,value) end # retrieve metadata value # @param key [String] Note that the key passed in should start with the metadata_key_prefix # if one has been specified. def metadata(key) = @meta[key] def metadata_include?(key) = @meta.include?(key) def metadata_key_prefix = @metadata_key_prefix attr_accessor :source,:ref_num # Return a copy of self where all values have been cleared. # Metadata is def cleared() = self.class.new(@keys,Array.new(@keys.length){nil}) # clear blanks will replace all fields where the value is pure whitespace # with a nil instead. Note that nils get special treatment in operations # like #overlay() # @return [self] def clear_blanks() @vals.transform_values{|v| (v.is_a?(String) && /^\s*$/.match?(v)) ? nil : v } return self end # @return [Boolean] Returns true if the value for each provided key is nil def clear?(*key_names) key_names.flatten! return key_names.all?{|k| self[k].nil?} end # Note that if the same key name is duplicated multiple times, the leftmost # value is used def to_h h = Hash.new # reverse order so that in the case of duplicates the leftmost dominates (-1..-@keys.length).step(-1).each{|i| h[@keys[i]] = @vals[i] } return h end def keys = @keys def each_key return enum_for(:each_key) unless block_given? @keys.each{|k| yield k } end # Similar to a Hash's #map! function # @return [void] def kv_map!(&kv_receiver) raise "Block required" unless block_given? @keys.each_with_index{|k,i| @vals[i] = yield(k,@vals[i])} return nil end # @yield [key,value] def each return enum_for(:each) unless block_given? @keys.each_with_index{|k,i| yield(k,@vals[i]) } end def each_value(&block) return enum_for(:each_value) unless block_given? @vals.each{|v| yield(v)} end # An empty key is a key whose value is nil. # @param yield_index [Boolean] If true, yields the internal storage index number # rather than the key name. Note that internal index number is # the same for objects that #conforms?() # @return [Void,Enumerator] def each_empty_key(yield_index = false) return enum_for(:each_key_blank, yield_index) unless block_given? @keys.each_with_index{|k,i| yield(yield_index ? i : k) if @vals[i].nil?} return nil end # A filled key is a key whose value is not nil. The block is yielded with # the key and value (or index and value depending on yield_index parameter). # @param yield_index_instead_of_val [Boolean] If true, yields the internal storage index number # rather than the key name. Note that internal index number is # the same for objects that #conforms?() # @return [Void,Enumerator] def each_filled_pair(yield_index_instead_of_val = false) return enum_for(:each_key_nonblank, yield_index_instead_of_val) unless block_given? @keys.each_with_index{|k,i| yield((yield_index_instead_of_val ? i : k),@vals[i]) unless @vals[i].nil?} return nil end def values = return @vals.dup # alias for #values def to_a = values() # Note that duplications of the same key are counted toward this number def size = keys.length def length = self.size # Equality comparison is very tolerant and may not be what you expect. # * Hashes are deemed equal if they have the same unique keys # and the key-value pairs retrieved via #[] are equal. # * Arrays are deemed equal if the to_a() representation of self matches the # other array. # * Another KeyedArrayAccessor is deemed equal using one of two rules. # For #conforms? true objects, the exact value of keys and values is compared. # For #conforms? false objects, the set of distinct keys is the same in both arrays # and the value associated with each key using #[] is the same. # Another KeyedAccessArray is deemed equal if the key-value # pairs have the same number of keys and are equal (which means that # only the first of duplicate columns is compared) # # Note: Metadata is not considered for purposes of this comparison. def ==(otr) return true if self.equal?(otr) case otr in Hash return ((@keys + otr.keys)-(@keys&otr.keys)).empty? && otr.all?{|k,v| self[k] == v} in Array return @vals == otr in KeyedArrayAccessor if self.conforms?(otr) return @vals == otr._internal_vals else return ((@keys + otr._internal_keys) - (@keys & otr._internal_keys)).empty? && @keys.all?{|k| self[k] == otr[k]} end else return false # currently no other types are supported end #pattern match end def value_at(index) = @vals[index] def include?(key) = return @keys.include?(key) # Other object must support # Note: metadata is left unchanged by this method. def merge!(otr_hashlike) raise "Block handling not yet supported by this method" if block_given? otr_hashlike.each_key{|k| self[k] = otr_hashlike[k]} return self end def merge(otr_hashlike) = self.dup.merge!(otr_hashlike) # Provides hash-style access to a value by it's key (rather than its position). # Note that if duplicate keys exist, the leftmost key is returned. # Returns the value of the key or quietly returns nil if the key isn't found # # Note, if the object has metadata, and the metadata_key_prefix is not nil, # this method will attempt to retrieve metadata matching the key before # retrieving the normal key data. If the metadata does not contain the # requested key, this will check for a match of the normal data. def [](key) return @meta[key] if @meta_key_prefix && key.start_with?(@meta_key_prefix) && @meta&.include?(key) @keys.index(key)&.tap{|ix| return @vals[ix]} end # Uses a hash-style access to update values. Note that becuase this data # structure does not enforce uniqueness of keys, this method will only update # the leftmost value corresponding to the given key. # # Important note. Adding a new key to the object will make it non-conforming # with other objects. def []=(key,new_val) ix = @keys.index(key) if ix (@vals[ix] = new_val) if ix # simple case... update existing value else # ix.nil? raise <<~ERRMSG if !extensible Because the #extensible() attribute is set to false, a new key may not be added [#{key}] ERRMSG # new keys are appended to the right side @keys = (@keys.dup << key) @vals << new_val end return new_val end # indicated that the object has exactly the same keys in exactly the same order def conforms?(otr) return otr.is_a?(self.class) && @keys == otr._internal_keys end # Given a KeyedArrayAccessor objects, update self to form a "merged" # KeyedArrayAccessor where self "underlays" an "upper" array to generate # a merged data structure. # An overlay/underlay follows these rules: # If "upper" and self have non-blank for the same element, then the upper element would # "overlay" the corresponding entry in self. If the upper is blank, then # it does not "overlay". # Note that the object in array position zero is at the bottom of the overlay. # Random Observation: If the upper_kaa is completely populated, the # lower_kaa is essentially ignored. # # NOTE: When the upper_kaa does not #conforms?(), the result # will have a key set containing the union of the upper and lower keysets. # Also, overlaying non-conforming objects will have worse performance. # Note that if it is possible, for the overlay to be generated without # adding keys, this strategy will be used. # # # @param upper_kaa [KeyedArrayAccessor] Non-blank entries here will "overlay" # entries of the lower kaa. This method is a no-op # if it is passed itself # @return [self] # # Note: Metadata for self is unchanged by this method. def underlay!(upper_kaa) return self if self.equal?(upper_kaa) # return self (no-op) if conforms?(upper_kaa) upper_kaa._each_value_with_index(true){|upper_val,ix| @vals[ix] = upper_val} else upper_kaa.each_filled_pair(false){|key,upper_val| self[key] = upper_val} end return self end # See #overlay!() except this creates a copy rather than altering self. def overlay(lower_kaa) = lower_kaa.underlay(self) def underlay(upper_kaa) = self.dup.underlay!(upper_kaa) # See #underlay() # An important difference between overlay and underlay is the ordering # of columns in the result. Column order is in the order of the lower # object plus any (non-nil) additions appearing to the right. This operation # is not particularly efficient except when working with conforming arrays. # # @return [self] # # Note: metadata is unchanged by this method. def overlay!(lower_kaa) return self if self.equal?(lower_kaa) #no-op if conforms?(lower_kaa) lower_kaa._each_value_with_index(true){|lower_val,ix| @vals[ix] ||= lower_val} else new_arr = lower_kaa.dup.underlay!(self) @keys = new_arr._internal_keys @vals = new_arr._internal_vals @key_count = nil end return self end # Repeatedly overlays successive KeyedArrayAccessor with the first # one being at the bottom and the last one being at the top. # @return [KeyedArrayAccessor,nil] returns nil if the inbound enumerable was empty def self.bulk_overlay(kaa_enumerable) kaa_enumerable.inject(nil){|accum,kaa| (accum||kaa.dup).underlay!(kaa) } end # Internal implementation to optimize pattern matching # Does two main things: # 1) Converts keys passed in to string type (allowing symbols to be used for pattern matching) # 2) Makes it looks like metadata and data occupy the same keyspace regardless of # the setting of the @metadata_key_prefix class PseudoROHashForDeconstruct def initialize(keyed_array) = @kaa = keyed_array def include?(k) = @kaa.include?(k.to_s) || @kaa.metadata_include?(k.to_s) def [](k) = (@kaa.metadata_key_prefix.nil? ? (@kaa[k.to_s] || @kaa.metadata(k.to_s)) : @kaa[k.to_s] ) end # Note that deconstruct_keys will expose metadata values regardless of the choice # of @metadata_key_prefix. Although, an actual key with the same name as a # metadata value will hide the metadata value. def deconstruct_keys(keys) # Developer note: I think the below line should work as desired because this object is so # much like a hash, but possibly it'll be necessary to support one or more methods. return PseudoROHashForDeconstruct.new(self) end protected def _same_keys?(keys2) = return @keys.equal?(keys2) || @keys == keys2 def _set_val_at(index,newval) = (@vals[index] = newval) def _internal_keys = @keys def _internal_vals = @vals # Beware, the indices returned are only meaningful for conforming arrays # Only iterates through visible values def _each_value_with_index(suppress_nils = false) return enum_for(:each_value_with_index) unless block_given? @keys.each_with_index{|k,i| yield(@vals[i],i) unless suppress_nils && @vals[i].nil? } end end #class KeyedArrayAccessor end #module Nodepile