lib/pragmatic_segmenter/segmenter.rb in pragmatic_segmenter-0.0.3 vs lib/pragmatic_segmenter/segmenter.rb in pragmatic_segmenter-0.0.4

- old
+ new

@@ -15,67 +15,33 @@ require 'pragmatic_segmenter/languages/french' require 'pragmatic_segmenter/languages/italian' require 'pragmatic_segmenter/languages/spanish' require 'pragmatic_segmenter/languages/russian' require 'pragmatic_segmenter/languages/japanese' +require 'pragmatic_segmenter/languages/common' +require 'pragmatic_segmenter/language_support' require 'pragmatic_segmenter/rules' module PragmaticSegmenter # This class segments a text into an array of sentences. class Segmenter - include Rules + include LanguageSupport attr_reader :text, :language, :doc_type + def initialize(text:, **args) - return [] unless text + return unless text @language = args[:language] || 'en' @doc_type = args[:doc_type] - if args[:clean].eql?(false) - @text = text.dup - else - case @language - when 'en' - @text = PragmaticSegmenter::Languages::English::Cleaner.new(text: text.dup, doc_type: args[:doc_type]).clean - when 'ja' - @text = PragmaticSegmenter::Languages::Japanese::Cleaner.new(text: text.dup, doc_type: args[:doc_type]).clean - else - @text = PragmaticSegmenter::Cleaner.new(text: text.dup, doc_type: args[:doc_type]).clean - end + @text = text.dup + + unless args[:clean].eql?(false) + @text = cleaner_class.new(text: @text, doc_type: args[:doc_type]).clean end end def segment return [] unless text - case language - when 'en' - PragmaticSegmenter::Process.new(text: text, doc_type: doc_type).process - when 'de' - PragmaticSegmenter::Languages::Deutsch::Process.new(text: text, doc_type: doc_type).process - when 'es' - PragmaticSegmenter::Languages::Spanish::Process.new(text: text, doc_type: doc_type).process - when 'it' - PragmaticSegmenter::Languages::Italian::Process.new(text: text, doc_type: doc_type).process - when 'ja' - PragmaticSegmenter::Languages::Japanese::Process.new(text: text, doc_type: doc_type).process - when 'el' - PragmaticSegmenter::Languages::Greek::Process.new(text: text, doc_type: doc_type).process - when 'ru' - PragmaticSegmenter::Languages::Russian::Process.new(text: text, doc_type: doc_type).process - when 'ar' - PragmaticSegmenter::Languages::Arabic::Process.new(text: text, doc_type: doc_type).process - when 'am' - PragmaticSegmenter::Languages::Amharic::Process.new(text: text, doc_type: doc_type).process - when 'hi' - PragmaticSegmenter::Languages::Hindi::Process.new(text: text, doc_type: doc_type).process - when 'hy' - PragmaticSegmenter::Languages::Armenian::Process.new(text: text, doc_type: doc_type).process - when 'fa' - PragmaticSegmenter::Languages::Persian::Process.new(text: text, doc_type: doc_type).process - when 'my' - PragmaticSegmenter::Languages::Burmese::Process.new(text: text, doc_type: doc_type).process - when 'ur' - PragmaticSegmenter::Languages::Urdu::Process.new(text: text, doc_type: doc_type).process - else - PragmaticSegmenter::Process.new(text: text, doc_type: doc_type).process - end + + process_class.new(text: text, doc_type: doc_type).process end end end