Sha256: a8c0669ad0d3c94a65fdb32307420b1e395dcaa27d086fb3ef436e35b6f113d4
Contents?: true
Size: 781 Bytes
Versions: 1
Compression:
Stored size: 781 Bytes
Contents
# encoding: UTF-8 require File.expand_path('../../vendor/icu4j-53_1.jar', __FILE__) java_import 'com.ibm.icu.util.ULocale' java_import 'com.ibm.icu.text.BreakIterator' module Pilcrow class << self def process(text, locale) insert_markers(segment_text(text, locale)) end private def segment_text(text, locale) brkiter = BreakIterator.getWordInstance(ULocale.new(locale)) brkiter.setText(text) start = brkiter.first segments = [] until (stop = brkiter.next) == BreakIterator::DONE segments << text[start...stop] start = stop end segments end def insert_markers(segments) # pilcrow character: http://en.wikipedia.org/wiki/Pilcrow segments.join("\u00B6") end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
pilcrow-1.0.0 | lib/pilcrow.rb |