lib/ebooks/generator.rb in ebooks-0.2.0 vs lib/ebooks/generator.rb in ebooks-0.2.1
- old
+ new
@@ -17,11 +17,14 @@
csv_text = CSV.read(@tweets_csv_path)
# Create a new clean file of text that acts as the seed for your Markov chains
File.open(@corpus_path, 'w') do |file|
csv_text.reverse_each do |row|
- # Strip links and new lines
- tweet_text = row[5].gsub(/(?:f|ht)tps?:\/[^\s]+/, '').gsub(/\n/,' ')
+ tweet_text = row[5]
+ .gsub(/(?:f|ht)tps?:\/[^\s]+/, '') # Strip links
+ .gsub(/\n/,' ') # Strip new lines
+ .gsub(/@[a-z0-9_]+/i, '') # Strip usernames
+ .gsub(/[R|M]T/, '') # Strip RTs
# Save the text
file.write("#{tweet_text}\n")
end
end
end