Sha256: 4e43ca90ba84f4bd3a3ee030a2e7c71ed2ccddebe1c2f7dc25d3f3781b7406d5

Contents?: true

Size: 1.42 KB

Versions: 13

Compression:

Stored size: 1.42 KB

Contents

require 'rubygems'
require 'rmmseg'
require 'ferret'

module RMMSeg
  module Ferret
        # The Analyzer class can be used with Ferret .
    class Analyzer < ::Ferret::Analysis::Analyzer
      
      # Construct an Analyzer. Optional block can be used to
      # add more +TokenFilter+s. e.g.
      #
      #   analyzer = RMMSeg::Ferret::Analyzer.new { |tokenizer|
      #     Ferret::Analysis::LowerCaseFilter.new(tokenizer)
      #   }
      #
      def initialize(&brk)
        @brk = brk
      end
      
      def token_stream(field, text)
        t = Tokenizer.new(text)
        if @brk
          @brk.call(t)
        else
          t
        end
      end
    end

    # The Tokenizer tokenize text with RMMSeg::Algorithm.
    class Tokenizer < ::Ferret::Analysis::TokenStream
      # Create a new Tokenizer to tokenize +text+
      def initialize(str)
        self.text = str
      end

      # Get next token
      def next
        tok = @algor.next_token
        if tok.nil?
          return nil
        else
          @token.text = tok.text
          @token.start = tok.start
          @token.end = tok.end
          return @token
        end
      end
      
      # Get the text being tokenized
      def text
        @text
      end

      # Set the text to be tokenized
      def text=(str)
        @token = ::Ferret::Analysis::Token.new("", 0, 0)
        @text = str
        @algor = Algorithm.new(@text)
      end
    end
  end
end

Version data entries

13 entries across 13 versions & 4 rubygems

Version Path
rmmseg-cpp-new-0.3.1 lib/rmmseg/ferret.rb
rmmseg-cpp-new-0.3.0 lib/rmmseg/ferret.rb
lijia-rmmseg-cpp-10.2.9.2 lib/rmmseg/ferret.rb
rmmseg-cpp-traditional-2.0.4 lib/rmmseg/ferret.rb
rmmseg-cpp-traditional-2.0.2 lib/rmmseg/ferret.rb
rmmseg-cpp-traditional-2.0.0 lib/rmmseg/ferret.rb
rmmseg-cpp-traditional-1.0.1 lib/rmmseg/ferret.rb
rmmseg-cpp-traditional-1.0.0 lib/rmmseg/ferret.rb
rmmseg-cpp-traditional-0.0.2 lib/rmmseg/ferret.rb
rmmseg-cpp-0.2.9 lib/rmmseg/ferret.rb
rmmseg-cpp-0.2.7 lib/rmmseg/ferret.rb
rmmseg-cpp-0.2.6 lib/rmmseg/ferret.rb
rmmseg-cpp-0.2.5 lib/rmmseg/ferret.rb