lib/peddler/flat_file_parser.rb in peddler-1.6.0 vs lib/peddler/flat_file_parser.rb in peddler-1.6.1
- old
+ new
@@ -9,47 +9,38 @@
include Headers
# http://stackoverflow.com/questions/8073920/importing-csv-quoting-error-is-driving-me-nuts
OPTIONS = { col_sep: "\t", quote_char: "\x00", headers: true }.freeze
- attr_reader :content, :summary, :encoding
+ attr_reader :content, :summary
def initialize(res, encoding)
super(res)
- @encoding = encoding
- extract_content
+ scrub_body!(encoding)
+ extract_content_and_summary
end
def parse(&blk)
- CSV.parse(scrub_content, OPTIONS, &blk) if content
+ CSV.parse(content, OPTIONS, &blk) if content
end
def records_count
- summarize if summary?
+ summarize if summary
end
def valid?
headers['Content-MD5'] == Digest::MD5.base64digest(body)
end
private
- def extract_content
- if summary?
- @summary, @content = body.split("\n\n")
- else
- @content = body.dup
- end
+ def scrub_body!(encoding)
+ body.force_encoding(encoding) unless body.encoding == 'UTF-8'
end
- def scrub_content
- content
- .force_encoding(encoding)
- .encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
- end
-
- def summary?
- body.include?("\n\n")
+ def extract_content_and_summary
+ @content = body.encode('UTF-8', invalid: :replace, undef: :replace)
+ @summary, @content = @content.split("\n\n") if @content.include?("\n\n")
end
def summarize
Hash[summary.split("\n\t")[1, 2].map { |line| line.split("\t\t") }]
end