Sha256: a05aa73fefc620ab5eacc4b0ef30599244383e7a5aa0de255a4d5c8b1df3b1fa

Contents?: true

Size: 754 Bytes

Versions: 3

Compression:

Stored size: 754 Bytes

Contents

class Masticate::Sniffer
  attr_reader :file
  attr_reader :col_sep

  CandidateDelimiters = [',', '|', "\t"]

  def initialize(file)
    @file = file
  end

  def self.sniff(file)
    sniffer = new(file)
    sniffer.sniff
  end

  def sniff
    @col_sep = find_col_sep
    {
      :col_sep => col_sep,
      :field_counts => stats
    }
  end

  def find_col_sep
    line1 = file.lines.first
    delimcounts = CandidateDelimiters.each_with_object({}) do |delim,h|
      h[delim] = consider_delim(line1, delim)
    end
    file.seek(0) # reset file pointer
    delimcounts.sort_by{|h,v| -v}.first.first
  end

  def consider_delim(line, delim)
    line.count(delim)
  end

  def stats
    file.lines.map {|line| line.split(col_sep).count}.uniq
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
masticate-0.0.3 lib/masticate/sniffer.rb
masticate-0.0.2 lib/masticate/sniffer.rb
masticate-0.0.1 lib/masticate/sniffer.rb