Sha256: 90085138190728bfdee63fd3c349a388dd11c47ad07e46867f244817259808b7

Contents?: true

Size: 908 Bytes

Versions: 13

Compression:

Stored size: 908 Bytes

Contents

# -*- encoding : utf-8 -*-
require 'pragmatic_segmenter/languages'

module PragmaticSegmenter
  # This class segments a text into an array of sentences.
  class Segmenter
    attr_reader :text, :language, :doc_type

    def initialize(text:, language: 'en', doc_type: nil, clean: true)
      return unless text
      @language = language
      @language_module = Languages.get_language_by_code(language)
      @doc_type = doc_type

      if clean
        @text = cleaner.new(text: text, doc_type: @doc_type, language: @language_module).clean
      else
        @text = text
      end
    end

    def segment
      return [] unless @text
      processor.new(language: @language_module).process(text: @text)
    end

    private

    def processor
      @language_module::Processor
    rescue
      Processor
    end

    def cleaner
      @language_module::Cleaner
    rescue
      Cleaner
    end
  end
end

Version data entries

13 entries across 13 versions & 1 rubygems

Version Path
pragmatic_segmenter-0.3.17 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.16 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.15 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.14 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.13 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.12 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.10 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.9 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.8 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.7 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.6 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.5 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.4 lib/pragmatic_segmenter/segmenter.rb