Sha256: a36151b26b6c895c23db1094f44499a0a8f2724bfa05579ff4e566193372c57d

Contents?: true

Size: 1.79 KB

Versions: 4

Compression:

Stored size: 1.79 KB

Contents

module Ebooks
  class MarkovModel
    INTERIM = :interim # Special token marking newline/^/$ boundaries

    attr_accessor :tokens
    attr_reader :depth

    def represent(token)
      if token.nil? || token == "\n" || token.empty?
        INTERIM
      else
        token
      end
    end

    def consume(tokenized, depth=2)
      @tokens = [INTERIM]
      @depth = depth

      tokenized.each do |tokens|
        @tokens += tokens
        @tokens << INTERIM
      end

      @model = {}

      @tokens.each_with_index do |token, i|
        prev_tokens = []

        @depth.downto(1) do |j|
          if i-j < 0; next
          else; prev = represent(@tokens[i-j])
          end
          prev_tokens << prev
        end

        1.upto(@depth) do |j|
          break if j > prev_tokens.length
          ngram = prev_tokens.last(j)

          unless ngram == INTERIM && prev_tokens[-1] == INTERIM
            @model[ngram] ||= []
            @model[ngram] << represent(token)
          end
        end
      end

      self
    end

    def chain(tokens)
      next_token = nil
      @depth.downto(1).each do |i|
        next if tokens.length < i
        matches = @model[tokens.last(i)]
        if matches
          #p tokens.last(i)
          #puts "=> #{matches.inspect}"
          next_token = matches.sample
          break
        end
      end

      raise ArgumentError if next_token.nil?

      if next_token == INTERIM
        return tokens
      else
        return chain(tokens + [next_token])
      end
    end

    def generate
      tokens = chain([@model[[INTERIM]].sample])
      NLP.reconstruct(tokens)
    end

    def serialize
      { 'model' => @model,
        'depth' => @depth }
    end

    def deserialize(data)
      @model = data['model']
      @depth = data['depth']
      self
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
twitter_ebooks-2.0.3 lib/twitter_ebooks/markov.rb
twitter_ebooks-2.0.2 lib/twitter_ebooks/markov.rb
twitter_ebooks-2.0.1 lib/twitter_ebooks/markov.rb
twitter_ebooks-2.0.0 lib/twitter_ebooks/markov.rb