lib/data_cleansing/cleaners.rb in data_cleansing-0.8.0 vs lib/data_cleansing/cleaners.rb in data_cleansing-0.9.0
- old
+ new
@@ -43,47 +43,56 @@
end
end
DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable)
# Remove HTML Markup
- module RemoveHTMLMarkup
+ module ReplaceHTMLMarkup
HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in)
def self.call(string)
return string unless string.is_a?(String)
string.gsub!(HTML_MARKUP) do |match|
case match.downcase
- when 'amp' then
+ when '&' then
'&'
- when 'quot' then
+ when '"' then
'"'
- when 'gt' then
+ when '>' then
'>'
- when 'lt' then
+ when '<' then
'<'
- when 'apos' then
+ when ''' then
"'"
- when 'nbsp' then
+ when ' ' then
' '
else
"&#{match};"
end
end || string
end
end
- DataCleansing.register_cleaner(:remove_html_markup, RemoveHTMLMarkup)
+ DataCleansing.register_cleaner(:replace_html_markup, ReplaceHTMLMarkup)
- module ReplaceURIChars
+ module UnescapeURI
def self.call(string)
return string unless string.is_a?(String)
URI.unescape(string)
end
end
- DataCleansing.register_cleaner(:replace_uri_chars, ReplaceURIChars)
+ DataCleansing.register_cleaner(:unescape_uri, UnescapeURI)
+ module EscapeURI
+ def self.call(string)
+ return string unless string.is_a?(String)
+
+ URI.escape(string)
+ end
+ end
+ DataCleansing.register_cleaner(:escape_uri, EscapeURI)
+
# Compress multiple whitespace to a single space
module CompressWhitespace
WHITESPACE = Regexp.compile(/\s+/)
def self.call(string)
@@ -121,17 +130,41 @@
string.length > 0 ? string.to_i : nil
end
end
DataCleansing.register_cleaner(:string_to_integer, StringToInteger)
+ # Returns [Integer] after removing all non-digit characters, except '.'
+ # Returns nil if no digits are present in the string.
+ module StringToFloat
+ NUMERIC = Regexp.compile(/[^0-9\.]/)
+
+ def self.call(string)
+ return string unless string.is_a?(String)
+
+ # Remove Non-Digit Chars, except for '.'
+ string.gsub!(NUMERIC, '')
+ string.length > 0 ? string.to_f : nil
+ end
+ end
+ DataCleansing.register_cleaner(:string_to_float, StringToFloat)
+
# Convert a Date to a Time at the end of day for that date (YYYY-MM-DD 23:59:59)
# Ex: 2015-12-31 becomes 2015-12-31 23:59:59
# If something other than a Date object is passed in, it just passes through.
- module DateToTimeAtEndOfDay
- def self.call(date)
- return date unless date.kind_of?(Date)
-
- date.to_time.end_of_day
+ #
+ # Note: Only works if ActiveSupport is also loaded since it defines Time#end_of_day.
+ module EndOfDay
+ def self.call(datetime)
+ case datetime
+ when String
+ Time.parse(datetime).end_of_day
+ when Date
+ datetime.to_time.end_of_day
+ when Time
+ datetime.end_of_day
+ else
+ datetime
+ end
end
end
- DataCleansing.register_cleaner(:date_to_time_at_end_of_day, DateToTimeAtEndOfDay)
+ DataCleansing.register_cleaner(:end_of_day, EndOfDay)
end