Sha256: 7d1b53abdb3ba0db371fd684fabf310032d8329786f36838a60b3e620a10a981
Contents?: true
Size: 1.69 KB
Versions: 1
Compression:
Stored size: 1.69 KB
Contents
class Markov::Parser def initialize @split_sentence = /(?<=[.?!])\s+/ @split_words = /([,.?!])|[\s]/ @replace_chars = /[„':;_"()]/ @unparsed_sentences = [] @tokens = [] end class FileNotFoundError < Exception # :nodoc: end class EmptyDictionaryError < Exception # :nodoc: end def load_text(source) if File.exists?(source) sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence) else raise FileNotFoundError.new("#{source} does not exist!") end sentences.each do |sentence| add_unparsed_sentence sentence end end def next_token if @tokens.empty? sentence = @unparsed_sentences.slice!(0) if sentence sentence.each do |word| if word.include?(",") @tokens << Markov::Token.new(",", :special) elsif word.include?("?") @tokens << Markov::Token.new("?", :stop) elsif word.include?("!") @tokens << Markov::Token.new("!", :stop) elsif word.include?(".") @tokens << Markov::Token.new(".", :stop) elsif word == "" # skip blanks else @tokens << Markov::Token.new(word, :word) end end else @tokens = nil end end return @tokens.slice!(0) if @tokens @tokens = [] nil end # end next_token private def add_unparsed_sentence(sentence) sentence.gsub!(@replace_chars, "") words = sentence.split(@split_words) if words && !words.empty? @unparsed_sentences << words end end # add_unparsed_sentence end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
markov-generator-0.10.0 | lib/markov/parser.rb |