lib/csv_decision/dictionary.rb in csv_decision-0.1.0 vs lib/csv_decision/dictionary.rb in csv_decision-0.2.0
- old
+ new
@@ -6,32 +6,91 @@
# See LICENSE and README.md for details.
module CSVDecision
# Parse the CSV file's header row. These methods are only required at table load time.
# @api private
module Dictionary
- # Table used to build a column dictionary entry.
- ENTRY = {
- in: { type: :in, eval: nil },
- 'in/text': { type: :in, eval: false },
- out: { type: :out, eval: nil },
- 'out/text': { type: :out, eval: false },
- guard: { type: :guard, eval: true },
- if: { type: :if, eval: true }
- }.freeze
- private_constant :ENTRY
+ # Column dictionary entries.
+ class Entry
+ # Table used to build a column dictionary entry.
+ ENTRY = {
+ in: { type: :in, eval: nil },
+ 'in/text': { type: :in, eval: false },
+ set: { type: :set, eval: nil, set_if: true },
+ 'set/nil?': { type: :set, eval: nil, set_if: :nil? },
+ 'set/blank?': { type: :set, eval: nil, set_if: :blank? },
+ out: { type: :out, eval: nil },
+ 'out/text': { type: :out, eval: false },
+ guard: { type: :guard, eval: true },
+ if: { type: :if, eval: true }
+ }.freeze
+ private_constant :ENTRY
- # Value object to hold column dictionary entries.
- Entry = Struct.new(:name, :eval, :type) do
+ # Input column types.
+ INS_TYPES = %i[in guard set].freeze
+ private_constant :INS_TYPES
+
+ # Create a new column dictionary entry defaulting attributes from the column type,
+ # which is looked up in +ENTRY+ table.
+ #
+ # @param name [Symbol] Column name.
+ # @param type [Symbol] Column type.
+ # @return [Entry] Column dictionary entry.
+ def self.create(name:, type:)
+ entry = ENTRY[type]
+ new(name: name, eval: entry[:eval], type: entry[:type], set_if: entry[:set_if])
+ end
+
+ # @return [Boolean] Return true is this is an input column, false otherwise.
def ins?
- %i[in guard].member?(type) ? true : false
+ @ins
end
- end
- # These column types do not need a name.
- COLUMN_TYPE_ANONYMOUS = Set.new(%i[guard if]).freeze
- private_constant :COLUMN_TYPE_ANONYMOUS
+ # @return [Symbol] Column name.
+ attr_reader :name
+ # @return [Symbol] Column type.
+ attr_reader :type
+
+ # @return [nil, Boolean] If set to true then this column has procs that
+ # need evaluating, otherwise it only contains constants.
+ attr_accessor :eval
+
+ # @return [nil, true, Symbol] Defined for columns of type :set, nil otherwise.
+ # If true, then default is set unconditionally, otherwise the method symbol
+ # sent to the input hash value that must evaluate to a truthy value.
+ attr_reader :set_if
+
+ # @return [Matchers::Proc, Object] For a column of type set: gives the proc that must be
+ # evaluated to set the default value. If not a proc, then it's some type of constant.
+ attr_accessor :function
+
+ # @param name (see #name)
+ # @param type (see #type)
+ # @param eval (see #eval)
+ # @param set_if (see #set_if)
+ def initialize(name:, type:, eval: nil, set_if: nil)
+ @name = name
+ @type = type
+ @eval = eval
+ @set_if = set_if
+ @function = nil
+ @ins = INS_TYPES.member?(type)
+ end
+
+ # Convert the object's attributes to a hash.
+ #
+ # @return [Hash{Symbol=>[nil, Boolean, Symbol]}]
+ def to_h
+ {
+ name: @name,
+ type: @type,
+ eval: @eval,
+ set_if: @set_if
+ }
+ end
+ end
+
# Classify and build a dictionary of all input and output columns by
# parsing the header row.
#
# @param header [Array<String>] The header row after removing any empty columns.
# @return [Hash<Hash>] Column dictionary is a hash of hashes.
@@ -46,129 +105,62 @@
# Add a new symbol to the dictionary of named input and output columns.
#
# @param columns [{Symbol=>Symbol}] Hash of column names with key values :in or :out.
# @param name [Symbol] Symbolized column name.
# @param out [false, Index] False if an input column, otherwise the index of the output column.
- # @return [{Symbol=>Symbol}] Column dictionary updated with the new name.
+ # @return [Hash{Symbol=>[:in, Integer]}] Column dictionary updated with the new name.
def self.add_name(columns:, name:, out: false)
- validate_name(columns: columns, name: name, out: out)
+ Validate.name(columns: columns, name: name, out: out)
columns[name] = out ? out : :in
columns
end
- def self.validate_column(cell:, index:)
- match = Header::COLUMN_TYPE.match(cell)
- raise CellValidationError, 'column name is not well formed' unless match
-
- column_type = match['type']&.downcase&.to_sym
- column_name = column_name(type: column_type, name: match['name'], index: index)
-
- [column_type, column_name]
- rescue CellValidationError => exp
- raise CellValidationError, "header column '#{cell}' is not valid as the #{exp.message}"
- end
- private_class_method :validate_column
-
- def self.column_name(type:, name:, index:)
- # if: columns are named after their index, which is an integer and so cannot
- # clash with other column name types, which are symbols.
- return index if type == :if
-
- return format_column_name(name) if name.present?
-
- return if COLUMN_TYPE_ANONYMOUS.member?(type)
- raise CellValidationError, 'column name is missing'
- end
- private_class_method :column_name
-
- def self.format_column_name(name)
- column_name = name.strip.tr("\s", '_')
-
- return column_name.to_sym if Header::COLUMN_NAME_RE.match(column_name)
- raise CellValidationError, "column name '#{name}' contains invalid characters"
- end
- private_class_method :format_column_name
-
- # Returns the normalized column type, along with an indication if
- # the column requires evaluation
- def self.column_type(column_name, entry)
- Entry.new(column_name, entry[:eval], entry[:type])
- end
- private_class_method :column_type
-
def self.parse_cell(cell:, index:, dictionary:)
- column_type, column_name = validate_column(cell: cell, index: index)
+ column_type, column_name = Validate.column(cell: cell, index: index)
- entry = column_type(column_name, ENTRY[column_type])
-
- dictionary_entry(dictionary: dictionary, entry: entry, index: index)
+ dictionary_entry(dictionary: dictionary,
+ entry: Entry.create(name: column_name, type: column_type),
+ index: index)
end
private_class_method :parse_cell
def self.dictionary_entry(dictionary:, entry:, index:)
case entry.type
- # Header column that has a function for setting the value (planned feature)
- # when :set, :'set/nil?', :'set/blank?'
- # # Default function will set the input value unconditionally or conditionally
- # dictionary.defaults[index] =
- # Columns::Default.new(entry.name, nil, default_if(type))
- #
- # # Treat set: as an in: column
- # dictionary.ins[index] = entry
-
- when :in
- add_name(columns: dictionary.columns, name: entry.name)
- dictionary.ins[index] = entry
-
# A guard column is still added to the ins hash for parsing as an input column.
- when :guard
- dictionary.ins[index] = entry
+ when :in, :guard, :set
+ input_entry(dictionary: dictionary, entry: entry, index: index)
- when :out
- add_name(columns: dictionary.columns, name: entry.name, out: index)
- dictionary.outs[index] = entry
-
- # Add an if: column to both the +outs+ hash for output column parsing, and also
- # a specialized +ifs+ hash used for evaluating them for row filtering.
- when :if
- dictionary.outs[index] = entry
- dictionary.ifs[index] = entry
+ when :out, :if
+ output_entry(dictionary: dictionary, entry: entry, index: index)
end
dictionary
end
private_class_method :dictionary_entry
- def self.validate_name(columns:, name:, out:)
- return unless (in_out = columns[name])
+ def self.output_entry(dictionary:, entry:, index:)
+ case entry.type
+ # if: columns are anonymous
+ when :if
+ dictionary.ifs[index] = entry
- return validate_out_name(in_out: in_out, name: name) if out
- validate_in_name(in_out: in_out, name: name)
- end
- private_class_method :validate_name
-
- def self.validate_out_name(in_out:, name:)
- if in_out == :in
- raise CellValidationError, "output column name '#{name}' is also an input column"
+ when :out
+ add_name(columns: dictionary.columns, name: entry.name, out: index)
end
- raise CellValidationError, "output column name '#{name}' is duplicated"
+ dictionary.outs[index] = entry
end
- private_class_method :validate_out_name
+ private_class_method :output_entry
- def self.validate_in_name(in_out:, name:)
- # in: columns may be duped
- return if in_out == :in
+ def self.input_entry(dictionary:, entry:, index:)
+ dictionary.ins[index] = entry
- raise CellValidationError, "output column name '#{name}' is also an input column"
- end
- private_class_method :validate_in_name
+ # Default function will set the input value unconditionally or conditionally.
+ dictionary.defaults[index] = entry if entry.type == :set
- # def self.default_if(type)
- # return nil if type == :set
- # return :nil? if type == :'set/nil'
- # :blank?
- # end
- # private_class_method :default_if
+ # guard: columns are anonymous
+ add_name(columns: dictionary.columns, name: entry.name) unless entry.type == :guard
+ end
+ private_class_method :input_entry
end
end
\ No newline at end of file