lib/ebooks/generator.rb in ebooks-0.0.1 vs lib/ebooks/generator.rb in ebooks-0.1.0
- old
+ new
@@ -1,29 +1,59 @@
module Ebooks
- module Generator
+ class Generator
- def self.generate_twitter_corpus(tweets_csv_path = 'tweets.csv', corpus_path = 'markov_dict.txt')
+ attr_accessor :dictionary
+
+ def initialize(config)
+ @tweets_csv_path = config[:tweets_csv_path]
+ @corpus_path = config[:corpus_path]
+ build_corpus
+ @dictionary_name = config[:dictionary_name]
+ @dictionary = build_dictionary
+ end
+
+ def generate_twitter_corpus
# Go to Twitter.com -> Settings -> Download Archive.
# This tweets.csv file is in the top directory. Put it in the same directory as this script.
- csv_text = CSV.parse(File.read(tweets_csv_path))
+ csv_text = CSV.parse(File.read(@tweets_csv_path))
# Create a new clean file of text that acts as the seed for your Markov chains
- File.open(corpus_path, 'w') do |file|
+ File.open(@corpus_path, 'w') do |file|
csv_text.reverse.each do |row|
# Strip links and new lines
tweet_text = row[5].gsub(/(?:f|ht)tps?:\/[^\s]+/, '').gsub(/\n/,' ')
# Save the text
file.write("#{tweet_text}\n")
end
end
end
- def self.generate_sentence(corpus_path = 'markov_dict.txt')
+ def generate_sentence
# Run when you want to generate a new Markov tweet
- markov = MarkyMarkov::Dictionary.new('dictionary') # Saves/opens dictionary.mmd
- markov.parse_file(corpus_path)
- tweet_text = markov.generate_n_sentences(2).split(/\#\</).first.chomp.chop
- markov.save_dictionary!
+ dictionary.generate_n_sentences(2).split(/\#\</).first.chomp.chop
+ end
+
+ private
+
+ def build_corpus
+ unless File.exists?(@corpus_path)
+ generate_twitter_corpus
+ end
+ end
+
+ def build_dictionary
+ if File.exists?(dictionary_path)
+ MarkyMarkov::Dictionary.new(@dictionary_name)
+ else
+ markov = MarkyMarkov::Dictionary.new(@dictionary_name)
+ markov.parse_file(@corpus_path)
+ markov.save_dictionary!
+ markov
+ end
+ end
+
+ def dictionary_path
+ "#{@dictionary_name}.mmd"
end
end
end