Sha256: a0ae7c10955d788170cf2551c6e5400373038bfd1879a0c23d856ff1a8c69030

Contents?: true

Size: 1.71 KB

Versions: 6

Compression:

Stored size: 1.71 KB

Contents

#encoding: UTF-8
module TextRank
  module Tokenizer

    CURRENCY_SYMBOLS = '[' + [
      "\u00a4", # Generic Currency Symbol
      "\u0024", # Dollar Sign
      "\u00a2", # Cent Sign
      "\u00a3", # Pound Sterling
      "\u00a5", # Yen Symbol
      "\u20a3", # Franc Sign
      "\u20a4", # Lira Symbol
      "\u20a7", # Peseta Sign
      "\u20ac", # Euro Symbol
      "\u20B9", # Rupee 
      "\u20a9", # Won Sign
      "\u20b4", # Hryvnia Sign
      "\u20af", # Drachma Sign
      "\u20ae", # Tugrik Sign
      "\u20b0", # German Penny Sign
      "\u20b2", # Guarani Sign
      "\u20b1", # Peso Sign
      "\u20b3", # Austral Sign
      "\u20b5", # Cedi Sign
      "\u20ad", # Kip Sign
      "\u20aa", # New Sheqel Sign
      "\u20ab", # Dong Sign
      "\u0025", # Percent
      "\u2030", # Per Million
    ].join + ']'
    private_constant :CURRENCY_SYMBOLS # Do not expose this to avoid confusion

    ##
    # A tokenizer regex that preserves money or formatted numbers as a single token. This
    # currently supports 24 different currency symbols:
    #
    # * ¤
    # * $
    # * ¢
    # * £
    # * ¥
    # * ₣
    # * ₤
    # * ₧
    # * €
    # * ₹
    # * ₩
    # * ₴
    # * ₯
    # * ₮
    # * ₰
    # * ₲
    # * ₱
    # * ₳
    # * ₵
    # * ₭
    # * ₪
    # * ₫
    # * %
    # * ‰
    #
    # It also supports two alternative formats for negatives as well as optional three digit comma
    # separation and optional decimals.
    ##
    Money = %r{
      (
        #{CURRENCY_SYMBOLS} \-? #{Number}      # $-45,231.21
        |
        \-? #{CURRENCY_SYMBOLS} #{Number}      # -$45,231.21
        |
        \( #{CURRENCY_SYMBOLS} #{Number} \)    # ($45,231.21)
      )
    }x

  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
text_rank-1.2.3 lib/text_rank/tokenizer/money.rb
text_rank-1.2.2 lib/text_rank/tokenizer/money.rb
text_rank-1.2.0 lib/text_rank/tokenizer/money.rb
text_rank-1.1.7 lib/text_rank/tokenizer/money.rb
text_rank-1.1.6 lib/text_rank/tokenizer/money.rb
text_rank-1.1.5 lib/text_rank/tokenizer/money.rb