lib/pragmatic_segmenter/segmenter.rb in pragmatic_segmenter-0.3.3 vs lib/pragmatic_segmenter/segmenter.rb in pragmatic_segmenter-0.3.4
- old
+ new
@@ -2,26 +2,40 @@
require 'pragmatic_segmenter/languages'
module PragmaticSegmenter
# This class segments a text into an array of sentences.
class Segmenter
- include Languages
attr_reader :text, :language, :doc_type
- def initialize(text:, language: nil, doc_type: nil, clean: true)
+ def initialize(text:, language: 'en', doc_type: nil, clean: true)
return unless text
- @language = language || 'en'
+ @language = language
+ @language_module = Languages.get_language_by_code(language)
@doc_type = doc_type
if clean
- @text = cleaner_class.new(text: text, doc_type: @doc_type).clean
+ @text = cleaner.new(text: text, doc_type: @doc_type, language: @language_module).clean
else
@text = text
end
end
def segment
return [] unless @text
- process_class.new(text: @text, language: language_module).process
+ processor.new(language: @language_module).process(text: @text)
+ end
+
+ private
+
+ def processor
+ @language_module::Processor
+ rescue
+ Processor
+ end
+
+ def cleaner
+ @language_module::Cleaner
+ rescue
+ Cleaner
end
end
end