lib/twitter_ebooks/model.rb in twitter_ebooks-2.0.4 vs lib/twitter_ebooks/model.rb in twitter_ebooks-2.0.5
- old
+ new
@@ -38,19 +38,12 @@
sentences = NLP.sentences(text)
log "Tokenizing #{sentences.length} sentences"
@sentences = sentences.map { |sent| NLP.tokenize(sent) }
- log "Building markov model"
- @markov = MarkovModel.build(@sentences)
-
log "Ranking keywords"
- require 'benchmark'
- puts Benchmark.measure {
- @keywords = NLP.keywords(@sentences)
- p @keywords.top(100)
- }
+ @keywords = NLP.keywords(@sentences)
self
end
def save(path)
@@ -73,14 +66,14 @@
NLP.htmlentities.decode tweet
end
def markov_statement(limit=140, markov=nil)
- markov ||= @markov
+ markov ||= MarkovModel.build(@sentences)
tweet = ""
- while (tweet = markov.generate) do
+ while (tweet = markov.generate(@sentences)) do
next if tweet.length > limit
next if NLP.unmatched_enclosers?(tweet)
break if tweet.length > limit*0.4 || rand > 0.8
end
@@ -110,12 +103,9 @@
# Generates a response by looking for related sentences
# in the corpus and building a smaller markov model from these
def markov_response(input, limit=140)
# First try
relevant, slightly_relevant = relevant_sentences(input)
-
- p relevant
- p slightly_relevant.length
if relevant.length >= 3
markov = MarkovModel.new.consume(relevant)
markov_statement(limit, markov)
elsif slightly_relevant.length > 5