lib/data_cleansing/cleaners.rb in data_cleansing-0.9.0 vs lib/data_cleansing/cleaners.rb in data_cleansing-1.0.0

- old
+ new

@@ -1,6 +1,6 @@ -require 'uri' +require 'cgi' module Cleaners # Strip leading and trailing whitespace module Strip def self.call(string) return string unless string.is_a?(String) @@ -18,10 +18,20 @@ string.upcase! || string end end DataCleansing.register_cleaner(:upcase, Upcase) + # Convert to downcase + module Downcase + def self.call(string) + return string unless string.is_a?(String) + + string.downcase! || string + end + end + DataCleansing.register_cleaner(:downcase, Downcase) + # Remove all non-word characters, including whitespace module RemoveNonWord NOT_WORDS = Regexp.compile(/\W/) def self.call(string) @@ -42,11 +52,11 @@ string.gsub!(NOT_PRINTABLE, '') || string end end DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable) - # Remove HTML Markup + # Unescape HTML Markup ( case-insensitive ) module ReplaceHTMLMarkup HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in) def self.call(string) return string unless string.is_a?(String) @@ -75,19 +85,19 @@ module UnescapeURI def self.call(string) return string unless string.is_a?(String) - URI.unescape(string) + CGI.unescape(string) end end DataCleansing.register_cleaner(:unescape_uri, UnescapeURI) module EscapeURI def self.call(string) return string unless string.is_a?(String) - URI.escape(string) + CGI.escape(string) end end DataCleansing.register_cleaner(:escape_uri, EscapeURI) # Compress multiple whitespace to a single space