lib/rbbt/util/tsv/parse.rb in rbbt-util-3.0.3 vs lib/rbbt/util/tsv/parse.rb in rbbt-util-3.1.0
- old
+ new
@@ -1,15 +1,15 @@
require 'rbbt/util/misc'
+require 'progress-bar'
class TSV
def self.parse_fields(io, delimiter = "\t")
return [] if io.nil?
## split with delimiter, do not remove empty
fields = io.split(delimiter, -1)
-
fields
end
def self.parse_header(stream, sep = nil, header_hash = nil)
sep = /\t/ if sep.nil?
@@ -56,10 +56,11 @@
key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash])
options = Misc.add_defaults options, more_options
options = Misc.add_defaults options,
+ :monitor => false,
:case_insensitive => false,
:type => :double,
:namespace => nil,
:identifiers => nil,
@@ -76,10 +77,12 @@
:fix => nil,
:exclude => nil,
:select => nil,
:grep => nil
+
+ monitor = Misc.process_options options, :monitor
header_hash, sep, sep2 =
Misc.process_options options, :header_hash, :sep, :sep2
key, fields =
@@ -117,17 +120,37 @@
fix, exclude, select, grep =
Misc.process_options options, :fix, :exclude, :select, :grep
exclude ||= Misc.process_options options, :reject if options.include? :reject
+ if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
+ size = case
+ when stream.respond_to?(:size)
+ stream.size
+ else
+ stream.stat.size
+ end
+ desc = "Parsing Stream"
+ step = 100
+ if Hash === monitor
+ desc = monitor[:desc] if monitor.include? :desc
+ step = monitor[:step] if monitor.include? :step
+ end
+ progress_monitor = Progress::Bar.new(size, 0, step, desc)
+ else
+ progress_monitor = nil
+ end
+
#{{{ Process rest
- data = {}
+ data = options[:persistence_data] || {}
single = type.to_sym != :double
max_cols = 0
while line do
line.chomp!
+ progress_monitor.tick(stream.pos) if progress_monitor
+
if line.empty? or
(exclude and exclude.call(line)) or
(select and not select.call(line))
line = stream.gets
@@ -185,11 +208,11 @@
case
when type == :single
data[id] = extra.first
when type == :flat
if data.include? id
- data[id].concat extra
+ data[id] = data[id] + extra
else
data[id] = extra
end
else
data[id] = extra
@@ -259,10 +282,10 @@
end
end
end
end
- if keep_empty and max_cols > 0
+ if keep_empty and max_cols > 0 and not Persistence::TSV === data
data.each do |key, values|
next if values =~ /__Ref:/
new_values = values
max_cols.times do |i|
if type == :double