lib/numerizer/numerizer.rb in pangel-chronic-0.3.0.3 vs lib/numerizer/numerizer.rb in pangel-chronic-0.3.10
- old
+ new
@@ -1,98 +1,97 @@
require 'strscan'
class Numerizer
- DIRECT_NUMS = [
- ['eleven', '11'],
- ['twelve', '12'],
- ['thirteen', '13'],
- ['fourteen', '14'],
- ['fifteen', '15'],
- ['sixteen', '16'],
- ['seventeen', '17'],
- ['eighteen', '18'],
- ['nineteen', '19'],
- ['ninteen', '19'], # Common mis-spelling
- ['zero', '0'],
- ['one', '1'],
- ['two', '2'],
- ['three', '3'],
- ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
- ['five', '5'],
- ['six(\W|$)', '6\1'],
- ['seven(\W|$)', '7\1'],
- ['eight(\W|$)', '8\1'],
- ['nine(\W|$)', '9\1'],
- ['ten', '10'],
- ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
- ]
+ DIRECT_NUMS = [
+ ['eleven', '11'],
+ ['twelve', '12'],
+ ['thirteen', '13'],
+ ['fourteen', '14'],
+ ['fifteen', '15'],
+ ['sixteen', '16'],
+ ['seventeen', '17'],
+ ['eighteen', '18'],
+ ['nineteen', '19'],
+ ['ninteen', '19'], # Common mis-spelling
+ ['zero', '0'],
+ ['one', '1'],
+ ['two', '2'],
+ ['three', '3'],
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
+ ['five', '5'],
+ ['six(\W|$)', '6\1'],
+ ['seven(\W|$)', '7\1'],
+ ['eight(\W|$)', '8\1'],
+ ['nine(\W|$)', '9\1'],
+ ['ten', '10'],
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
+ ]
- TEN_PREFIXES = [ ['twenty', 20],
- ['thirty', 30],
- ['forty', 40],
- ['fourty', 40], # Common misspelling
- ['fifty', 50],
- ['sixty', 60],
- ['seventy', 70],
- ['eighty', 80],
- ['ninety', 90]
- ]
+ TEN_PREFIXES = [ ['twenty', 20],
+ ['thirty', 30],
+ ['fourty', 40],
+ ['fifty', 50],
+ ['sixty', 60],
+ ['seventy', 70],
+ ['eighty', 80],
+ ['ninety', 90]
+ ]
- BIG_PREFIXES = [ ['hundred', 100],
- ['thousand', 1000],
- ['million', 1_000_000],
- ['billion', 1_000_000_000],
- ['trillion', 1_000_000_000_000],
- ]
+ BIG_PREFIXES = [ ['hundred', 100],
+ ['thousand', 1000],
+ ['million', 1_000_000],
+ ['billion', 1_000_000_000],
+ ['trillion', 1_000_000_000_000],
+ ]
- def self.numerize(string)
- string = string.dup
+ def self.numerize(string)
+ string = string.dup
+
+ # preprocess
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
+
+ # easy/direct replacements
+
+ DIRECT_NUMS.each do |dn|
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
+ end
+
+ # ten, twenty, etc.
+
+ TEN_PREFIXES.each do |tp|
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
+ end
+
+ TEN_PREFIXES.each do |tp|
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
+ end
+
+ # hundreds, thousands, millions, etc.
+
+ BIG_PREFIXES.each do |bp|
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
+ andition(string)
+ end
+
+ # fractional addition
+ # I'm not combining this with the previous block as using float addition complicates the strings
+ # (with extraneous .0's and such )
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
+
+ string.gsub(/<num>/, '')
+ end
- # preprocess
- string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
- string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
+ private
+
+ def self.andition(string)
+ sc = StringScanner.new(string)
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
+ sc.reset
+ end
+ end
+ end
- # easy/direct replacements
-
- DIRECT_NUMS.each do |dn|
- string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
- end
-
- # ten, twenty, etc.
-
- TEN_PREFIXES.each do |tp|
- string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
- end
-
- TEN_PREFIXES.each do |tp|
- string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
- end
-
- # hundreds, thousands, millions, etc.
-
- BIG_PREFIXES.each do |bp|
- string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
- andition(string)
- end
-
- # fractional addition
- # I'm not combining this with the previous block as using float addition complicates the strings
- # (with extraneous .0's and such )
- string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
-
- string.gsub(/<num>/, '')
- end
-
- private
-
- def self.andition(string)
- sc = StringScanner.new(string)
- while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
- if sc[2] =~ /and/ || sc[1].size > sc[3].size
- string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
- sc.reset
- end
- end
- end
-
-end
+end
\ No newline at end of file