Sha256: d8d89138539330cc00ff43748c6025c84c690a7b81af0746ae4b1c4df6f947fa

Contents?: true

Size: 1.38 KB

Versions: 1

Compression:

Stored size: 1.38 KB

Contents

require 'natto'

class Ranjax
  END_OF_TEXT = '__E__'
  @words = []
  @head_idxs = []

  def initialize(path: nil)
    @words = []
    @head_idxs = []

    unless path.nil?
      load(path)
    end
  end

  def import_text(text)
    nm = Natto::MeCab.new

    words = []
    nm.parse(text) do |n|
      words << n.surface
    end
    words << END_OF_TEXT

    @head_idxs << @words.size
    @words += words
  end

  def generate_text(max: nil)
    units = []
    @words.each_cons(3) do |unit|
      units << unit
    end

    head_idx= @head_idxs.sample
    t1 = units[head_idx][0]
    t2 = units[head_idx][1]

    dst_text = t1 + t2
    loop do
      candidate_units = []
      units.each do |unit|
        candidate_units << unit if unit[0] == t1 && unit[1] == t2
      end

      break if candidate_units.size == 0

      unit = candidate_units.sample

      break if max != nil && dst_text.size + unit[2].size > max

      break if unit[2] == END_OF_TEXT

      dst_text += unit[2]

      t1 = unit[1]
      t2 = unit[2]
    end

    dst_text
  end

  def save(path)
    if path.empty?
      raise ArgumentError.new('Bad Path')
    end

    data = Marshal.dump({
      :words=>@words,
      :head_idxs=>@head_idxs
    })
    File.write(path, data)
  end

  private

  def load(path)
    r = File.read(path)
    data = Marshal.load(r)
    @words = data[:words]
    @head_idxs = data[:head_idxs]
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
ranjax-0.1.0 lib/ranjax.rb