bin/wu-lign in wukong-1.4.7 vs bin/wu-lign in wukong-1.4.9
- old
+ new
@@ -99,26 +99,30 @@
# simply reformatted according to the consensus of the initial
# FORMAT_GUESSING_LINES.
#
FORMAT_GUESSING_LINES = 500
# widest column to set
-MAX_MAX_WIDTH = 70
+MAX_MAX_WIDTH = 100
INT_RE = /\A\d+\z/
FLOAT_RE = /\A(\d+)(?:\.(\d+))?(?:e-?\d+)?\z/
-def consensus_type val, alltype
- return :mixed if alltype == :mixed
+def get_type val
case
when val == '' then type = nil
when val =~ INT_RE then type = :int
when val =~ FLOAT_RE then type = :float
- else type = :str end
- return if ! type
+ else type = :str end
+end
+
+def consensus_type val, alltype, is_first
+ return :mixed if alltype == :mixed
+ type = get_type(val) or return
case
- when alltype.nil? then type
- when alltype == type then type
+ when alltype.nil? then type
+ when is_first && (alltype == :str) then type
+ when alltype == type then type
when ( ((alltype==:float) && (type == :int)) || ((alltype == :int) && (type == :float)) )
:float
else :mixed
end
end
@@ -132,46 +136,56 @@
col_types = []
col_minmag = []
col_maxmag = []
rows = []
skip_col = []
+has_header = false
ARGV.each_with_index{|v,i| next if (v == '') ; maxw[i] = 0; skip_col[i] = true }
FORMAT_GUESSING_LINES.times do
line = $stdin.readline rescue nil
break unless line
- cols = line.chomp.split("\t").map{|s| s.strip }
- col_widths = cols.map{|col| col.length }
+ row = line.chomp.split("\t").map{|s| s.strip }
+ col_widths = row.map{|col| col.length }
col_widths.each_with_index{|cw,i| maxw[i] = [[cw,maxw[i]].compact.max, MAX_MAX_WIDTH].min }
- cols.each_with_index{|col,i|
+ row.each_with_index{|col,i|
next if skip_col[i]
- col_types[i] = consensus_type(col, col_types[i])
+ # Let the first row be text (headers)
+ col_types[i] = consensus_type(col, col_types[i], rows.length == 1)
if col_types[i] == :float
mantissa, radix = f_width(col)
col_minmag[i] = [radix, col_minmag[i], 1].compact.max
col_maxmag[i] = [mantissa, col_maxmag[i], 1].compact.max
end
}
- # p [maxw, col_types, col_minmag, col_maxmag, col_widths, cols]
- rows << cols
+ # p [rows.length, has_header, maxw, col_types, col_minmag, col_maxmag, col_widths, row]
+ has_header = true if row.all?{|col| get_type(col) == :str } && rows.length == 0
+ rows << row
end
format = maxw.zip(col_types, col_minmag, col_maxmag, ARGV).map do |width, type, minmag, maxmag, default|
next(lambda{|s| default % s rescue s }) if default.to_s != ''
case type
when :mixed, nil then lambda{|s| "%-#{width}s" % s }
when :str then lambda{|s| "%-#{width}s" % s }
when :int then lambda{|s| "%#{width}d" % s.to_i }
- when :float then lambda{|s| "%#{maxmag+minmag+1}.#{minmag}f" % s.to_f }
+ when :float then lambda{|s| "%#{maxmag+minmag+2}.#{minmag}f" % s.to_f }
else raise "oops type #{type}" end
end
-# p [maxw, col_types, col_minmag, col_maxmag, format]
+def dump_row row, format
+ puts row.zip(format).map{|c,f| f.call(c) rescue c }.join("\t")
+end
+def dump_header row, maxw
+ puts row.zip(maxw).map{|col, width| "%-#{width}s" % col.to_s }.join("\t")
+end
+
pad = [''] * maxw.length
+dump_header(rows.shift, maxw) if has_header
rows.each do |row|
# note -- strips trailing columns
- puts row.zip(format).map{|c,f| f.call(c) }.join("\t")
+ dump_row(row, format)
end
$stdin.each do |line|
- cols = line.chomp.split("\t").map{|s| s.strip }
+ row = line.chomp.split("\t").map{|s| s.strip }
# note -- strips trailing columns
- puts cols.zip(format).map{|c,f| f.call(c) rescue c }.join("\t")
+ dump_row(row, format)
end