Sha256: 7d1b53abdb3ba0db371fd684fabf310032d8329786f36838a60b3e620a10a981

Contents?: true

Size: 1.69 KB

Versions: 1

Compression:

Stored size: 1.69 KB

Contents

class Markov::Parser

  def initialize
    @split_sentence = /(?<=[.?!])\s+/
    @split_words = /([,.?!])|[\s]/
    @replace_chars = /[„':;_"()]/
    
    @unparsed_sentences = []
    @tokens = []
  end
  
  class FileNotFoundError < Exception # :nodoc:
  end
  
  class EmptyDictionaryError < Exception # :nodoc:
  end
  
  def load_text(source)
    
    if File.exists?(source)
      sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
    else
      raise FileNotFoundError.new("#{source} does not exist!")
    end
    
    sentences.each do |sentence|
      add_unparsed_sentence sentence
    end
    
  end 
  
  def next_token
    
    if @tokens.empty?
      sentence = @unparsed_sentences.slice!(0)
      if sentence
        sentence.each do |word|
          
          if word.include?(",")
            @tokens << Markov::Token.new(",", :special)
          elsif word.include?("?")
            @tokens << Markov::Token.new("?", :stop)
          elsif word.include?("!")
            @tokens << Markov::Token.new("!", :stop)
          elsif word.include?(".")
            @tokens << Markov::Token.new(".", :stop)
          elsif word == ""
            # skip blanks
          else
            @tokens << Markov::Token.new(word, :word)
          end            
        end
      else
        @tokens = nil
      end
    end
    
    return @tokens.slice!(0) if @tokens
    
    @tokens = []
    nil  
  end # end next_token
  
  private
  
  def add_unparsed_sentence(sentence)
    
    sentence.gsub!(@replace_chars, "")
    words = sentence.split(@split_words)
    if words && !words.empty?
      @unparsed_sentences << words
    end
    
  end # add_unparsed_sentence
  
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
markov-generator-0.10.0 lib/markov/parser.rb