Sha256: 3fb4ea97cf7db3547be452c366b246f6203b5177d00595a0df8eca766d6a5cae

Contents?: true

Size: 639 Bytes

Versions: 9

Compression:

Stored size: 639 Bytes

Contents

module PragmaticSegmenter
  module Languages
    module Persian
      include Languages::Common

      SENTENCE_BOUNDARY_REGEX = /.*?[:\.!\?؟]|.*?\z|.*?$/
      Punctuations = ['?', '!', ':', '.', '؟'].freeze

      ReplaceColonBetweenNumbersRule = Rule.new(/(?<=\d):(?=\d)/, '♭')
      ReplaceNonSentenceBoundaryCommaRule = Rule.new(/،(?=\s\S+،)/, '♬')

      class AbbreviationReplacer < AbbreviationReplacer
        SENTENCE_STARTERS = [].freeze

        private

        def scan_for_replacements(txt, am, index, character_array)
          txt.gsub!(/(?<=#{am})\./, '∯')
          txt
        end
      end
    end
  end
end

Version data entries

9 entries across 9 versions & 1 rubygems

Version Path
pragmatic_segmenter-0.3.17 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.16 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.15 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.14 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.13 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.12 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.10 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.9 lib/pragmatic_segmenter/languages/persian.rb
pragmatic_segmenter-0.3.8 lib/pragmatic_segmenter/languages/persian.rb