lib/rbbt/util/tsv/parse.rb in rbbt-util-3.0.3 vs lib/rbbt/util/tsv/parse.rb in rbbt-util-3.1.0

- old
+ new

@@ -1,15 +1,15 @@ require 'rbbt/util/misc' +require 'progress-bar' class TSV def self.parse_fields(io, delimiter = "\t") return [] if io.nil? ## split with delimiter, do not remove empty fields = io.split(delimiter, -1) - fields end def self.parse_header(stream, sep = nil, header_hash = nil) sep = /\t/ if sep.nil? @@ -56,10 +56,11 @@ key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash]) options = Misc.add_defaults options, more_options options = Misc.add_defaults options, + :monitor => false, :case_insensitive => false, :type => :double, :namespace => nil, :identifiers => nil, @@ -76,10 +77,12 @@ :fix => nil, :exclude => nil, :select => nil, :grep => nil + + monitor = Misc.process_options options, :monitor header_hash, sep, sep2 = Misc.process_options options, :header_hash, :sep, :sep2 key, fields = @@ -117,17 +120,37 @@ fix, exclude, select, grep = Misc.process_options options, :fix, :exclude, :select, :grep exclude ||= Misc.process_options options, :reject if options.include? :reject + if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos) + size = case + when stream.respond_to?(:size) + stream.size + else + stream.stat.size + end + desc = "Parsing Stream" + step = 100 + if Hash === monitor + desc = monitor[:desc] if monitor.include? :desc + step = monitor[:step] if monitor.include? :step + end + progress_monitor = Progress::Bar.new(size, 0, step, desc) + else + progress_monitor = nil + end + #{{{ Process rest - data = {} + data = options[:persistence_data] || {} single = type.to_sym != :double max_cols = 0 while line do line.chomp! + progress_monitor.tick(stream.pos) if progress_monitor + if line.empty? or (exclude and exclude.call(line)) or (select and not select.call(line)) line = stream.gets @@ -185,11 +208,11 @@ case when type == :single data[id] = extra.first when type == :flat if data.include? id - data[id].concat extra + data[id] = data[id] + extra else data[id] = extra end else data[id] = extra @@ -259,10 +282,10 @@ end end end end - if keep_empty and max_cols > 0 + if keep_empty and max_cols > 0 and not Persistence::TSV === data data.each do |key, values| next if values =~ /__Ref:/ new_values = values max_cols.times do |i| if type == :double