Sha256: 4c93a52bfac3424bb9b52a54647c907ff43fca71b985f78b620b342933dab4ce

Contents?: true

Size: 939 Bytes

Versions: 7

Compression:

Stored size: 939 Bytes

Contents

# -*- encoding : utf-8 -*-
# frozen_string_literal: true

require 'pragmatic_segmenter/languages'

module PragmaticSegmenter
  # This class segments a text into an array of sentences.
  class Segmenter
    attr_reader :text, :language, :doc_type

    def initialize(text:, language: 'en', doc_type: nil, clean: true)
      return unless text
      @language = language
      @language_module = Languages.get_language_by_code(language)
      @doc_type = doc_type

      if clean
        @text = cleaner.new(text: text, doc_type: @doc_type, language: @language_module).clean
      else
        @text = text
      end
    end

    def segment
      return [] unless @text
      processor.new(language: @language_module).process(text: @text)
    end

    private

    def processor
      @language_module::Processor
    rescue
      Processor
    end

    def cleaner
      @language_module::Cleaner
    rescue
      Cleaner
    end
  end
end

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
pragmatic_segmenter-0.3.24 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.23 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.22 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.21 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.20 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.19 lib/pragmatic_segmenter/segmenter.rb
pragmatic_segmenter-0.3.18 lib/pragmatic_segmenter/segmenter.rb