lib/twitter_ebooks/model.rb in twitter_ebooks-2.2.5 vs lib/twitter_ebooks/model.rb in twitter_ebooks-2.2.6

- old
+ new

@@ -17,22 +17,23 @@ def self.load(path) Marshal.load(File.open(path, 'rb') { |f| f.read }) end def consume(path) - content = File.read(path) + content = File.read(path, :encoding => 'utf-8') @hash = Digest::MD5.hexdigest(content) if path.split('.')[-1] == "json" log "Reading json corpus from #{path}" lines = JSON.parse(content, symbolize_names: true).map do |tweet| tweet[:text] end elsif path.split('.')[-1] == "csv" log "Reading CSV corpus from #{path}" - header = CSV.read(path).first + content = CSV.parse(content) + header = content.shift text_col = header.index('text') - lines = CSV.read(path).drop(1).map do |tweet| + lines = content.map do |tweet| tweet[text_col] end else log "Reading plaintext corpus from #{path}" lines = content.split("\n")