lib/smarter_csv.rb in smarter_csv-1.7.3 vs lib/smarter_csv.rb in smarter_csv-1.7.4

- old
+ new

@@ -372,28 +372,25 @@ end end return false end - # raise exception if none is found + # If file has headers, then guesses column separator from headers. + # Otherwise guesses column separator from contents. + # Raises exception if none is found. def guess_column_separator(filehandle, options) - del = [',', "\t", ';', ':', '|'] - n = Hash.new(0) + possible_delimiters = [',', "\t", ';', ':', '|'] - 5.times do - line = filehandle.readline(options[:row_sep]) - del.each do |d| - n[d] += line.scan(d).count - end - rescue EOFError # short files - break - end + candidates = if options.fetch(:headers_in_file) + candidated_column_separators_from_headers(filehandle, options, possible_delimiters) + else + candidated_column_separators_from_contents(filehandle, options, possible_delimiters) + end - filehandle.rewind - raise SmarterCSV::NoColSepDetected if n.values.max == 0 + raise SmarterCSV::NoColSepDetected if candidates.values.max == 0 - col_sep = n.key(n.values.max) + candidates.key(candidates.values.max) end # limitation: this currently reads the whole file in before making a decision def guess_line_ending(filehandle, options) counts = {"\n" => 0, "\r" => 0, "\r\n" => 0} @@ -522,8 +519,40 @@ else result << [key, options[:duplicate_header_suffix], counts[key]].join end end result + end + + private + + def candidated_column_separators_from_headers(filehandle, options, delimiters) + candidates = Hash.new(0) + line = filehandle.readline(options[:row_sep]) + + delimiters.each do |d| + candidates[d] += line.scan(d).count + end + + filehandle.rewind + + candidates + end + + def candidated_column_separators_from_contents(filehandle, options, delimiters) + candidates = Hash.new(0) + + 5.times do + line = filehandle.readline(options[:row_sep]) + delimiters.each do |d| + candidates[d] += line.scan(d).count + end + rescue EOFError # short files + break + end + + filehandle.rewind + + candidates end end end