lib/loofah/html5/scrub.rb in loofah-2.0.3 vs lib/loofah/html5/scrub.rb in loofah-2.1.0.rc1

- old
+ new

@@ -1,14 +1,17 @@ #encoding: US-ASCII require 'cgi' +require 'crass' module Loofah module HTML5 # :nodoc: module Scrub CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/ + CSS_KEYWORDISH = /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/ + CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"} class << self def allowed_element? element_name ::Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name @@ -59,38 +62,37 @@ def scrub_css_attribute node style = node.attributes['style'] style.value = scrub_css(style.value) if style end - # lifted nearly verbatim from html5lib def scrub_css style - # disallow urls - style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') + style_tree = Crass.parse_properties style + sanitized_tree = [] - # gauntlet - return '' unless style =~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ - return '' unless style =~ /\A\s*([-\w]+\s*:[^:;]*(;\s*|$))*\z/ - - clean = [] - style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val| - next if val.empty? - prop.downcase! - if WhiteList::ALLOWED_CSS_PROPERTIES.include?(prop) - clean << "#{prop}: #{val};" - elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(prop.split('-')[0]) - clean << "#{prop}: #{val};" unless val.split().any? do |keyword| - !WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) && - keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/ + style_tree.each do |node| + next unless node[:node] == :property + next if node[:children].any? do |child| + [:url, :bad_url, :function].include? child[:node] + end + name = node[:name].downcase + if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name) + sanitized_tree << node << CRASS_SEMICOLON + elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first) + value = node[:value].split.map do |keyword| + if WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH + keyword + end + end.compact + unless value.empty? + propstring = sprintf "%s:%s", name, value.join(" ") + sanitized_node = Crass.parse_properties(propstring).first + sanitized_tree << sanitized_node << CRASS_SEMICOLON end - elsif WhiteList::ALLOWED_SVG_PROPERTIES.include?(prop) - clean << "#{prop}: #{val};" end end - style = clean.join(' ') + Crass::Parser.stringify sanitized_tree end - end - end end end