Sha256: 3f0370e4621ea00f535488346e9f8a84fce52a4b0468844ac756412b2cf4f9d0

Contents?: true

Size: 866 Bytes

Versions: 18

Compression:

Stored size: 866 Bytes

Contents

module PragmaticSegmenter
  module Languages
    class Hindi
      class Process < PragmaticSegmenter::Process
        private

        def sentence_boundary_punctuation(txt)
          PragmaticSegmenter::Languages::Hindi::SentenceBoundaryPunctuation.new(text: txt).split
        end

        def punctuation_array
          PragmaticSegmenter::Languages::Hindi::Punctuation.new.punct
        end
      end

      class Cleaner < PragmaticSegmenter::Cleaner
      end

      class SentenceBoundaryPunctuation < PragmaticSegmenter::SentenceBoundaryPunctuation
        SENTENCE_BOUNDARY = /.*?[।\|!\?]|.*?$/

        def split
          text.scan(SENTENCE_BOUNDARY)
        end
      end

      class Punctuation < PragmaticSegmenter::Punctuation
        PUNCT = ['।', '|', '.', '!', '?']

        def punct
          PUNCT
        end
      end
    end
  end
end

Version data entries

18 entries across 18 versions & 1 rubygems

Version Path
pragmatic_segmenter-0.3.1 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.3.0 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.2.0 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.8 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.7 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.6 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.5 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.4 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.3 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.2 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.1 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.1.0 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.0.9 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.0.8 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.0.7 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.0.6 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.0.5 lib/pragmatic_segmenter/languages/hindi.rb
pragmatic_segmenter-0.0.4 lib/pragmatic_segmenter/languages/hindi.rb