Sha256: 4e079dab12df00f36bee852ca2f73ee9becc0add9b1ebd67137d544edf3e7c33

Contents?: true

Size: 1.09 KB

Versions: 6

Compression:

Stored size: 1.09 KB

Contents

require 'cppjieba_rb/cppjieba_rb'
require 'cppjieba_rb/version'
require 'cppjieba_rb/segment'

module CppjiebaRb
  EXT_BASE = File.join(File.dirname(__FILE__), '..', 'ext', 'cppjieba', 'dict')
  DICT_PATH = File.join(EXT_BASE, 'jieba.dict.utf8')
  HMM_DICT_PATH = File.join(EXT_BASE, 'hmm_model.utf8')
  USER_DICT = File.join(EXT_BASE, 'user.dict.utf8')
  IDF_PATH = File.join(EXT_BASE, 'idf.utf8')
  STOP_WORD_PATH = File.join(EXT_BASE, 'stop_words.utf8')

  def self.extract_keyword(str, top_n)
    internal.extract_keyword(str, top_n)
  end

  def self.segment(str, opts = nil)
    CppjiebaRb::Segment.new(opts).segment(str)
  end

  def self.segment_tag(str)
    internal.segment_tag(str)
  end

  def self.filter_stop_word(arr)
    arr.reject { |w| internal.stop_word?(w) }
  end

  class << self
    def internal
      @backend ||= CppjiebaRb::Internal.new(DICT_PATH,
                                            HMM_DICT_PATH,
                                            USER_DICT,
                                            IDF_PATH,
                                            STOP_WORD_PATH)
    end
  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
cppjieba_rb-0.4.2 lib/cppjieba_rb.rb
cppjieba_rb-0.4.1 lib/cppjieba_rb.rb
cppjieba_rb-0.4.0 lib/cppjieba_rb.rb
cppjieba_rb-0.3.3 lib/cppjieba_rb.rb
cppjieba_rb-0.3.1 lib/cppjieba_rb.rb
cppjieba_rb-0.3.0 lib/cppjieba_rb.rb