Sha256: 9563feb76a4319cf90a0a77a720b237acbcd7507a9f8c5f8f3dce434f8d5825d

Contents?: true

Size: 1.56 KB

Versions: 1

Compression:

Stored size: 1.56 KB

Contents

module Ebooks
  class Generator

    attr_accessor :dictionary

    def initialize(config)
      @tweets_csv_path = config[:tweets_csv_path]
      @corpus_path     = config[:corpus_path]
      build_corpus
      @dictionary_name = config[:dictionary_name]
      @dictionary      = build_dictionary
    end

    def generate_twitter_corpus
      # Go to Twitter.com -> Settings -> Download Archive.
      # This tweets.csv file is in the top directory. Put it in the same directory as this script.
      csv_text = CSV.parse(File.read(@tweets_csv_path))

      # Create a new clean file of text that acts as the seed for your Markov chains
      File.open(@corpus_path, 'w') do |file|
        csv_text.reverse.each do |row|
          # Strip links and new lines
          tweet_text = row[5].gsub(/(?:f|ht)tps?:\/[^\s]+/, '').gsub(/\n/,' ')
          # Save the text
          file.write("#{tweet_text}\n")
        end
      end
    end

    def generate_sentence
      # Run when you want to generate a new Markov tweet
      dictionary.generate_n_sentences(2).split(/\#\</).first.chomp.chop
    end

    private

    def build_corpus
      unless File.exists?(@corpus_path)
        generate_twitter_corpus
      end
    end

    def build_dictionary
      if File.exists?(dictionary_path)
        MarkyMarkov::Dictionary.new(@dictionary_name)
      else
        markov = MarkyMarkov::Dictionary.new(@dictionary_name)
        markov.parse_file(@corpus_path)
        markov.save_dictionary!
        markov
      end
    end

    def dictionary_path
      "#{@dictionary_name}.mmd"
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
ebooks-0.1.0 lib/ebooks/generator.rb