Sha256: a05aa73fefc620ab5eacc4b0ef30599244383e7a5aa0de255a4d5c8b1df3b1fa
Contents?: true
Size: 754 Bytes
Versions: 3
Compression:
Stored size: 754 Bytes
Contents
class Masticate::Sniffer attr_reader :file attr_reader :col_sep CandidateDelimiters = [',', '|', "\t"] def initialize(file) @file = file end def self.sniff(file) sniffer = new(file) sniffer.sniff end def sniff @col_sep = find_col_sep { :col_sep => col_sep, :field_counts => stats } end def find_col_sep line1 = file.lines.first delimcounts = CandidateDelimiters.each_with_object({}) do |delim,h| h[delim] = consider_delim(line1, delim) end file.seek(0) # reset file pointer delimcounts.sort_by{|h,v| -v}.first.first end def consider_delim(line, delim) line.count(delim) end def stats file.lines.map {|line| line.split(col_sep).count}.uniq end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
masticate-0.0.3 | lib/masticate/sniffer.rb |
masticate-0.0.2 | lib/masticate/sniffer.rb |
masticate-0.0.1 | lib/masticate/sniffer.rb |