Sha256: a0ae7c10955d788170cf2551c6e5400373038bfd1879a0c23d856ff1a8c69030
Contents?: true
Size: 1.71 KB
Versions: 6
Compression:
Stored size: 1.71 KB
Contents
#encoding: UTF-8 module TextRank module Tokenizer CURRENCY_SYMBOLS = '[' + [ "\u00a4", # Generic Currency Symbol "\u0024", # Dollar Sign "\u00a2", # Cent Sign "\u00a3", # Pound Sterling "\u00a5", # Yen Symbol "\u20a3", # Franc Sign "\u20a4", # Lira Symbol "\u20a7", # Peseta Sign "\u20ac", # Euro Symbol "\u20B9", # Rupee "\u20a9", # Won Sign "\u20b4", # Hryvnia Sign "\u20af", # Drachma Sign "\u20ae", # Tugrik Sign "\u20b0", # German Penny Sign "\u20b2", # Guarani Sign "\u20b1", # Peso Sign "\u20b3", # Austral Sign "\u20b5", # Cedi Sign "\u20ad", # Kip Sign "\u20aa", # New Sheqel Sign "\u20ab", # Dong Sign "\u0025", # Percent "\u2030", # Per Million ].join + ']' private_constant :CURRENCY_SYMBOLS # Do not expose this to avoid confusion ## # A tokenizer regex that preserves money or formatted numbers as a single token. This # currently supports 24 different currency symbols: # # * ¤ # * $ # * ¢ # * £ # * ¥ # * ₣ # * ₤ # * ₧ # * € # * ₹ # * ₩ # * ₴ # * ₯ # * ₮ # * ₰ # * ₲ # * ₱ # * ₳ # * ₵ # * ₭ # * ₪ # * ₫ # * % # * ‰ # # It also supports two alternative formats for negatives as well as optional three digit comma # separation and optional decimals. ## Money = %r{ ( #{CURRENCY_SYMBOLS} \-? #{Number} # $-45,231.21 | \-? #{CURRENCY_SYMBOLS} #{Number} # -$45,231.21 | \( #{CURRENCY_SYMBOLS} #{Number} \) # ($45,231.21) ) }x end end
Version data entries
6 entries across 6 versions & 1 rubygems