lib/ebooks/generator.rb in ebooks-0.0.1 vs lib/ebooks/generator.rb in ebooks-0.1.0

- old
+ new

@@ -1,29 +1,59 @@ module Ebooks - module Generator + class Generator - def self.generate_twitter_corpus(tweets_csv_path = 'tweets.csv', corpus_path = 'markov_dict.txt') + attr_accessor :dictionary + + def initialize(config) + @tweets_csv_path = config[:tweets_csv_path] + @corpus_path = config[:corpus_path] + build_corpus + @dictionary_name = config[:dictionary_name] + @dictionary = build_dictionary + end + + def generate_twitter_corpus # Go to Twitter.com -> Settings -> Download Archive. # This tweets.csv file is in the top directory. Put it in the same directory as this script. - csv_text = CSV.parse(File.read(tweets_csv_path)) + csv_text = CSV.parse(File.read(@tweets_csv_path)) # Create a new clean file of text that acts as the seed for your Markov chains - File.open(corpus_path, 'w') do |file| + File.open(@corpus_path, 'w') do |file| csv_text.reverse.each do |row| # Strip links and new lines tweet_text = row[5].gsub(/(?:f|ht)tps?:\/[^\s]+/, '').gsub(/\n/,' ') # Save the text file.write("#{tweet_text}\n") end end end - def self.generate_sentence(corpus_path = 'markov_dict.txt') + def generate_sentence # Run when you want to generate a new Markov tweet - markov = MarkyMarkov::Dictionary.new('dictionary') # Saves/opens dictionary.mmd - markov.parse_file(corpus_path) - tweet_text = markov.generate_n_sentences(2).split(/\#\</).first.chomp.chop - markov.save_dictionary! + dictionary.generate_n_sentences(2).split(/\#\</).first.chomp.chop + end + + private + + def build_corpus + unless File.exists?(@corpus_path) + generate_twitter_corpus + end + end + + def build_dictionary + if File.exists?(dictionary_path) + MarkyMarkov::Dictionary.new(@dictionary_name) + else + markov = MarkyMarkov::Dictionary.new(@dictionary_name) + markov.parse_file(@corpus_path) + markov.save_dictionary! + markov + end + end + + def dictionary_path + "#{@dictionary_name}.mmd" end end end