module CSVPP class Format attr_reader :name, :description, :skip, :col_sep # @param path [String] path to format file # @return [Format] def self.load(path) return path if path.is_a? Format load_from_str File.read(path) end # @param json [String] # @return [Format] def self.load_from_str(json) return json if json.is_a? Format new Oj.load(json) end # @param format [Hash] def initialize(format) @name = format['name'] @description = format['description'] @multiline = format['multiline'].to_s.strip.downcase == 'true' @col_sep = format['column_separator'] @skip = format['skip'].to_i @vars = format.fetch('vars') if multiline? @vars_grouped_by_line = Hash[ vars.group_by { |var, meta| meta['line'] }.map do |line_id, vars| [line_id, vars.map { |var, *| var }] end ] @multiline_start = format.fetch('start') end # Cache for actual indices because formats provide 1-based human readable # positions. Only matters when parsing files with 30k+ line files. See # #index(var). @indices = {} end def var_names vars.keys end def length var_names.count end def index(var) @indices[var] ||= position(var) - 1 end def position(var) vars.fetch(var)['position'] end def type(var) vars.fetch(var)['type'] end # @param var [String]: name of the variable for which the missings are required # @return [Array] an array of missing values (can be empty if no missings were defined) def missings(var) array_from(var, 'missings') end # Returns the values that are defined as `true` in the the format's json # definition for the given variable. # @return [Array] all values that should be interpreted as true for this variable def true_values(var) return [] unless type(var) == "boolean" array_from(var, 'true_values') end # Returns the values that are defined as `false` in the the format's json # definition for the given variable. # @return [Array] all values that should be interpreted as `false` for this variable def false_values(var) return [] unless type(var) == "boolean" array_from(var, 'false_values') end def vars_for_line(line_id) vars_grouped_by_line.fetch(line_id) end def multiline_start?(line_id) multiline_start == line_id end def multiline? @multiline end def to_s "#{name.ljust(30)}\t| #{description}" end private # Returns the value or values specified for the given attribute of the given # variable in the formats json. An empty array if no such attribute was # defined for the given variable. # @return [Array] value(s) defined for given attribute for given variable def array_from(var, attribute) value = vars.fetch(var)[attribute] return [] if value.nil? return value if value.is_a?(Array) [value] end attr_reader :vars, :vars_grouped_by_line, :multiline_start end end