Sha256: 282f429567ee4e9e5fac2e0442c765fbf184f54e6f4d62fd394f64bd6b80410b

Contents?: true

Size: 927 Bytes

Versions: 1

Compression:

Stored size: 927 Bytes

Contents

#!/usr/bin/env ruby

require "groonga"
require "gi"

db_path = ARGV[0]
output_path = ARGV[1]

Arrow = GI.load("Arrow")
ArrowIO = GI.load("ArrowIO")
ArrowIPC = GI.load("ArrowIPC")

Groonga::Database.open(db_path)
terms = Groonga["Words"]

input_stream = ArrowIO::MemoryMappedFile.open(output_path, :read)
begin
  reader = ArrowIPC::StreamReader.open(input_stream)
  loop do
    record_batch = reader.next_record_batch
    break if record_batch.nil?
    columns = record_batch.columns
    related_terms = []
    previous_score = nil
    record_batch.n_rows.times do |i|
      score = columns[1].get_value(i)
      break if score < 0.1
      previous_score ||= score
      break if (previous_score - score) > (score / 2.0)
      term = Groonga::Record.new(terms, columns[0].get_value(i)).key
      related_terms << [term, score]
    end
    next if related_terms.size < 2
    p related_terms
  end
ensure
  input_stream.close
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
rabbit-slide-kou-nagoya-rubykaigi-03-2017.2.11.0 examples/raw-show-related-terms.rb