lib/string_tools.rb in string_tools-1.0.0 vs lib/string_tools.rb in string_tools-1.1.0

- old
+ new

@@ -153,17 +153,25 @@ 'img' => %w(src width height style) }.freeze TAGS_WITHOUT_ATTRIBUTES = %w(b strong i em sup sub ul ol li blockquote br tr u caption thead s).freeze + # Public: Sanitize string + # str - String for sanitize + # attrs - Hash, custom attributes, defaults empty hash + # remove_contents - Set of string, tags to be removed + # protocols - Array of string, protocols using in css properties urls def sanitize(str, attrs = {}) # для корректного обрезания utf строчек режем через mb_chars # для защиты от перегрузки парсера пропускаем максимум 1 мегабайт текста # длина русского символа в utf-8 - 2 байта, 1Мб/2б = 524288 = 2**19 символов # длина по символам с перестраховкой, т.к. латинские символы(теги, например) занимают 1 байт str = str.mb_chars.slice(0..(2**19)).to_s + remove_contents = attrs.delete(:remove_contents) + protocols = attrs.delete(:protocols) || [] + # Мерджим добавочные теги и атрибуты attributes = TAGS_WITH_ATTRIBUTES.merge(attrs) elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES transformers = [LINK_NORMALIZER] @@ -171,12 +179,12 @@ Sanitize.fragment( str, :attributes => attributes, :elements => elements, - :css => {:properties => Sanitize::Config::RELAXED[:css][:properties]}, - :remove_contents => %w(style script), + :css => {:properties => Sanitize::Config::RELAXED[:css][:properties], protocols: protocols}, + :remove_contents => remove_contents || Set['style', 'script'], :allow_comments => false, :transformers => transformers ) end end @@ -189,19 +197,28 @@ case node.name when 'a' normalize_link node, 'href' when 'img' normalize_link node, 'src' + remove_links node, 'alt' end end private def normalize_link(node, attr_name) return unless node[attr_name] node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s rescue Addressable::URI::InvalidURIError node.swap node.children + end + + def remove_links(node, attr_name) + return unless node[attr_name] + + node[attr_name] = node[attr_name].gsub(URI::DEFAULT_PARSER.make_regexp, '').squish + + node.remove_attribute(attr_name) if node[attr_name].empty? end end class IframeNormalizer def initialize(attributes)