lib/pragmatic_segmenter/languages/deutsch.rb in pragmatic_segmenter-0.1.0 vs lib/pragmatic_segmenter/languages/deutsch.rb in pragmatic_segmenter-0.1.1

- old
+ new

@@ -30,12 +30,23 @@ NumberPeriodSpaceRule = Rule.new(/(?<=\s[0-9]|\s([1-9][0-9]))\.(?=\s)/, '∯') # Rubular: http://rubular.com/r/ityNMwdghj NegativeNumberPeriodSpaceRule = Rule.new(/(?<=-[0-9]|-([1-9][0-9]))\.(?=\s)/, '∯') + DE_MONTHS = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember'] + def replace super @text.apply(NumberPeriodSpaceRule).apply(NegativeNumberPeriodSpaceRule) + replace_period_in_deutsch_dates(@text) + end + + def replace_period_in_deutsch_dates(txt) + DE_MONTHS.each do |month| + # Rubular: http://rubular.com/r/zlqgj7G5dA + txt.gsub!(/(?<=\d)\.(?=\s*#{Regexp.escape(month)})/, '∯') + end + txt end end class SingleLetterAbbreviation < PragmaticSegmenter::SingleLetterAbbreviation # Rubular: http://rubular.com/r/B4X33QKIL8