lib/namor/namor.rb in namor-0.1.0 vs lib/namor/namor.rb in namor-0.2.0

- old
+ new

@@ -5,16 +5,28 @@ def config(opts) @config = opts end - def extract(name) - return [] if name.nil? - + # clean up a single name component + # * output all converted to uppercase + # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes) + # * remove any words that are in the user-provided suppression list + # * remove words from list of common suffixes (Jr, Sr etc) + # * remove anything inside parenthesis + # * remove punctuation + # * squeeze whitespace & trim spaces from ends + def scrub(name, opts = {}) suppression_list = @config[:suppress] || [] - suppression_re = suppression_list.join('|') + suppression_re = (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') - detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip + name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip + end + + def extract(name, opts = {}) + return [] if name.nil? + + detitled_name = scrub(name, opts) if detitled_name =~ /,/ # "last, first[ middle]" lastname, firstname = detitled_name.split(/\s*,\s*/) lastname.gsub!(/ /, '')