Sha256: 567c0131bd8eeaa11d9f9d64e9a3cfcce3c48d2fce0fad73527531d7a7869a73

Contents?: true

Size: 1.46 KB

Versions: 3

Compression:

Stored size: 1.46 KB

Contents

require 'llt'
require 'llt/core/api'
require 'llt/segmenter/api'
require 'llt/tokenizer/api'
require 'sinatra/base'
require 'sinatra/respond_with'

class Api < Sinatra::Base
  helpers LLT::Core::Api::Helpers
  register Sinatra::RespondWith

  get '/segtok' do
    typecast_params!(params)
    text = extract_text(params)
    seg = LLT::Segmenter.new(params)
    tok = LLT::Tokenizer.new(params)
    sentences = seg.segment(text)
    if sentences.any?
      threads_count = (t = ENV['THREADS_FOR_LLT']) ? t.to_i : 4
      threads = []
      sentences.each_slice(slice_size(sentences, threads_count)) do |sliced|
        threads << Thread.new do
          forked_tok = tok.fork_instance
          process_segtok(forked_tok) do
            sliced.each do |sentence|
              forked_tok.tokenize(sentence.to_s, add_to: sentence)
            end
          end
        end
      end
      threads.each(&:join)
    end

    respond_to do |f|
      f.xml { to_xml(sentences, params) }
    end
  end

  def slice_size(sentences, threads)
    sent_size = sentences.size
    size = sent_size / threads + 1
    size <= 0 ? sent_size : size
  end

  def process_segtok(tokenizer)
    if tokenizer.db.type == :prometheus
      StemDatabase::Db.connection_pool.with_connection { yield }
      # This should NOT be needed, the block above should solve that.
      # I have no clue why the connections don't close by themselves...
      StemDatabase::Db.connection.close
    else
      yield
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
llt-0.0.3 lib/llt/api.rb
llt-0.0.2 lib/llt/api.rb
llt-0.0.1 lib/llt/api.rb