Sha256: c73cac082a63eaf9ac6a73e9399c7c408e99ab02c63201fcb4e6dc108e890b5f

Contents?: true

Size: 769 Bytes

Versions: 5

Compression:

Stored size: 769 Bytes

Contents

module Langusta
  class TagExtractor
    attr_accessor :tag
    attr_reader :count, :buffer, :target, :threshold

    def initialize(tag, threshold)
      @target = tag
      @threshold = threshold
      @count = 0
      @buffer = []
      @tag = nil
    end

    def add(line)
      if @target == @tag && line
        @buffer += line
      end
    end

    def clear
      @tag = nil
      @buffer = []
    end

    def close_tag(profile)
      if profile && @tag == @target && @buffer.length > @threshold
        gram = NGram.new
        @buffer.each do |codepoint|
          gram.add_char(codepoint)
          (1..NGram::N_GRAM).each do |n|
            profile.add(gram.get(n))
          end
        end
        @count += 1
      end
      clear()
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
langusta-0.2.4 lib/langusta/tag_extractor.rb
langusta-0.2.3 lib/langusta/tag_extractor.rb
langusta-0.2.2 lib/langusta/tag_extractor.rb
langusta-0.2.1 lib/langusta/tag_extractor.rb
langusta-0.2.0 lib/langusta/tag_extractor.rb