lib/namor/namor.rb in namor-0.1.0 vs lib/namor/namor.rb in namor-0.2.0
- old
+ new
@@ -5,16 +5,28 @@
def config(opts)
@config = opts
end
- def extract(name)
- return [] if name.nil?
-
+ # clean up a single name component
+ # * output all converted to uppercase
+ # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
+ # * remove any words that are in the user-provided suppression list
+ # * remove words from list of common suffixes (Jr, Sr etc)
+ # * remove anything inside parenthesis
+ # * remove punctuation
+ # * squeeze whitespace & trim spaces from ends
+ def scrub(name, opts = {})
suppression_list = @config[:suppress] || []
- suppression_re = suppression_list.join('|')
+ suppression_re = (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|')
- detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
+ end
+
+ def extract(name, opts = {})
+ return [] if name.nil?
+
+ detitled_name = scrub(name, opts)
if detitled_name =~ /,/
# "last, first[ middle]"
lastname, firstname = detitled_name.split(/\s*,\s*/)
lastname.gsub!(/ /, '')