lib/string_tools.rb in string_tools-1.0.0 vs lib/string_tools.rb in string_tools-1.1.0
- old
+ new
@@ -153,17 +153,25 @@
'img' => %w(src width height style)
}.freeze
TAGS_WITHOUT_ATTRIBUTES = %w(b strong i em sup sub ul ol li blockquote br tr u caption thead s).freeze
+ # Public: Sanitize string
+ # str - String for sanitize
+ # attrs - Hash, custom attributes, defaults empty hash
+ # remove_contents - Set of string, tags to be removed
+ # protocols - Array of string, protocols using in css properties urls
def sanitize(str, attrs = {})
# для корректного обрезания utf строчек режем через mb_chars
# для защиты от перегрузки парсера пропускаем максимум 1 мегабайт текста
# длина русского символа в utf-8 - 2 байта, 1Мб/2б = 524288 = 2**19 символов
# длина по символам с перестраховкой, т.к. латинские символы(теги, например) занимают 1 байт
str = str.mb_chars.slice(0..(2**19)).to_s
+ remove_contents = attrs.delete(:remove_contents)
+ protocols = attrs.delete(:protocols) || []
+
# Мерджим добавочные теги и атрибуты
attributes = TAGS_WITH_ATTRIBUTES.merge(attrs)
elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES
transformers = [LINK_NORMALIZER]
@@ -171,12 +179,12 @@
Sanitize.fragment(
str,
:attributes => attributes,
:elements => elements,
- :css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
- :remove_contents => %w(style script),
+ :css => {:properties => Sanitize::Config::RELAXED[:css][:properties], protocols: protocols},
+ :remove_contents => remove_contents || Set['style', 'script'],
:allow_comments => false,
:transformers => transformers
)
end
end
@@ -189,19 +197,28 @@
case node.name
when 'a'
normalize_link node, 'href'
when 'img'
normalize_link node, 'src'
+ remove_links node, 'alt'
end
end
private
def normalize_link(node, attr_name)
return unless node[attr_name]
node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s
rescue Addressable::URI::InvalidURIError
node.swap node.children
+ end
+
+ def remove_links(node, attr_name)
+ return unless node[attr_name]
+
+ node[attr_name] = node[attr_name].gsub(URI::DEFAULT_PARSER.make_regexp, '').squish
+
+ node.remove_attribute(attr_name) if node[attr_name].empty?
end
end
class IframeNormalizer
def initialize(attributes)