RubygemsResearch

Sha256: 3cd11bf31a7603ea9d709d942e736045ea9185678ccbd92e8dcce1893848b42d

Contents?: true

Size: 1.88 KB

Versions: 1

Compression:

Stored size: 1.88 KB

require_relative './lines'

module Gorillib
  module Model

    module LoadFromTsv
      extend  Gorillib::Concern
      include LoadLines

      included do |base|
        # Options that will be passed to CSV. Be careful to modify with assignment (`+=`) and not in-place (`<<`)
        base.class_attribute :tsv_options
        base.tsv_options = Hash.new
      end

      module ClassMethods

        # Iterate a block over each line of a TSV file
        #
        # @raise [Gorillib::Model::RawDataMismatchError] if a line has too many or too few fields
        # @yield an object instantiated from each line in the file.
        def _each_from_tsv(filename, options={})
          options = tsv_options.merge(options)
          num_fields  = options.delete(:num_fields){ (fields.length .. fields.length) }
          max_fields  = num_fields.max # need to make sure "1\t2\t\t\t" becomes ["1","2","","",""]
          #
          _each_raw_line(filename, options) do |line|
            tuple = line.split("\t", max_fields)
            unless num_fields.include?(tuple.length) then raise Gorillib::Model::RawDataMismatchError, "yark, spurious fields: #{tuple.inspect}" ; end
            yield from_tuple(*tuple)
          end
        end

        # With a block, calls block on each object in turn (and returns nil)
        #
        # With no block, accumulates all the instances into the array it
        # returns. As opposed to the with-a-block case, the memory footprint of
        # this increases as the filesize does, so use caution with large files.
        #
        # @return with a block, returns nil; with no block, an array of this class' instances
        def load_tsv(*args)
          if block_given?
            _each_from_tsv(*args, &Proc.new)
          else
            objs = []
            _each_from_tsv(*args){|obj| objs << obj }
            objs
          end
        end

      end
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version	Path
gorillib-0.5.0	lib/gorillib/model/serialization/tsv.rb